Repository: microsoft/D3D12TranslationLayer Branch: master Commit: c59bfe7079bf Files: 108 Total size: 2.0 MB Directory structure: gitextract_j6n7_r9v/ ├── CMakeLists.txt ├── CONTRIBUTING.md ├── DxbcParser/ │ ├── CMakeLists.txt │ ├── include/ │ │ ├── BlobContainer.h │ │ ├── DXBCUtils.h │ │ └── pch.h │ └── src/ │ ├── BlobContainer.cpp │ └── DXBCUtils.cpp ├── LICENSE ├── README.md ├── SECURITY.md ├── external/ │ ├── MicrosoftTelemetry.h │ ├── d3d12compatibility.h │ └── d3dx12.h ├── include/ │ ├── Allocator.h │ ├── BatchedContext.hpp │ ├── BatchedQuery.hpp │ ├── BatchedResource.hpp │ ├── BlitHelper.hpp │ ├── BlitHelperShaders.h │ ├── BlockAllocators.h │ ├── BlockAllocators.inl │ ├── CommandListManager.hpp │ ├── D3D12TranslationLayerDependencyIncludes.h │ ├── D3D12TranslationLayerIncludes.h │ ├── DXGIColorSpaceHelper.h │ ├── DeviceChild.hpp │ ├── DxbcBuilder.hpp │ ├── Fence.hpp │ ├── FormatDesc.hpp │ ├── ImmediateContext.hpp │ ├── ImmediateContext.inl │ ├── MaxFrameLatencyHelper.hpp │ ├── PipelineState.hpp │ ├── PrecompiledShaders.h │ ├── Query.hpp │ ├── Residency.h │ ├── Resource.hpp │ ├── ResourceBinding.hpp │ ├── ResourceCache.hpp │ ├── ResourceState.hpp │ ├── RootSignature.hpp │ ├── Sampler.hpp │ ├── Sampler.inl │ ├── Shader.hpp │ ├── Shader.inl │ ├── ShaderBinary.h │ ├── SharedResourceHelpers.hpp │ ├── SubresourceHelpers.hpp │ ├── SwapChainHelper.hpp │ ├── SwapChainManager.hpp │ ├── ThreadPool.hpp │ ├── Util.hpp │ ├── VideoDecode.hpp │ ├── VideoDecodeStatistics.hpp │ ├── VideoDevice.hpp │ ├── VideoProcess.hpp │ ├── VideoProcessEnum.hpp │ ├── VideoProcessShaders.h │ ├── VideoReferenceDataManager.hpp │ ├── VideoViewHelper.hpp │ ├── View.hpp │ ├── View.inl │ ├── XPlatHelpers.h │ ├── commandlistmanager.inl │ ├── pch.h │ └── segmented_stack.h ├── packages.config ├── scripts/ │ ├── BlitHelperShaders.hlsl │ ├── CompileBlitHelperShaders.cmd │ ├── CompileVideoProcessShaders.cmd │ └── DeinterlaceShader.hlsl └── src/ ├── Allocator.cpp ├── BatchedContext.cpp ├── BlitHelper.cpp ├── CMakeLists.txt ├── ColorConvertHelper.cpp ├── CommandListManager.cpp ├── DeviceChild.cpp ├── DxbcBuilder.cpp ├── Fence.cpp ├── FormatDescImpl.cpp ├── ImmediateContext.cpp ├── Main.cpp ├── MaxFrameLatencyHelper.cpp ├── PipelineState.cpp ├── Query.cpp ├── Residency.cpp ├── Resource.cpp ├── ResourceBinding.cpp ├── ResourceCache.cpp ├── ResourceState.cpp ├── RootSignature.cpp ├── Shader.cpp ├── ShaderBinary.cpp ├── ShaderParser.cpp ├── SharedResourceHelpers.cpp ├── SubresourceHelpers.cpp ├── SwapChainHelper.cpp ├── SwapChainManager.cpp ├── Util.cpp ├── VideoDecode.cpp ├── VideoDecodeStatistics.cpp ├── VideoDevice.cpp ├── VideoProcess.cpp ├── VideoProcessEnum.cpp ├── VideoReferenceDataManager.cpp └── View.cpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: CMakeLists.txt ================================================ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. cmake_minimum_required(VERSION 3.14) project(d3d12translationlayer) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) include(FetchContent) FetchContent_Declare( DirectX-Headers GIT_REPOSITORY https://github.com/Microsoft/DirectX-Headers.git GIT_TAG v1.619.1 ) FetchContent_MakeAvailable(DirectX-Headers) option(USE_PIX "Enable the use of PIX markers" ON) add_subdirectory(src) if (HAS_WDK) add_subdirectory(DxbcParser) target_link_libraries(d3d12translationlayer dxbcparser) endif() ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.microsoft.com. When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repositories using our CLA. This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. Note that this project is used within Microsoft for projects and components which are not open source. As such, changes which would impact those projects, such as changes to function signatures which are leveraged directly, will be subject to additional scrutiny. We welcome bugfixes and additional features which will improve the quality of Microsoft products, as well as other community projects which leverage this one. ================================================ FILE: DxbcParser/CMakeLists.txt ================================================ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. cmake_minimum_required(VERSION 3.13) project(dxbcparser) add_library(dxbcparser STATIC src/BlobContainer.cpp src/DXBCUtils.cpp include/BlobContainer.h include/DXBCUtils.h include/pch.h) target_include_directories(dxbcparser PUBLIC include PUBLIC ../external) ================================================ FILE: DxbcParser/include/BlobContainer.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include #define DXBC_MAJOR_VERSION 1 #define DXBC_MINOR_VERSION 0 #define DXBC_1_0_MAX_SIZE_IN_BYTES 0x02000000 #define DXBC_MAX_SIZE_IN_BYTES 0x80000000 #define DXBC_FOURCC(ch0, ch1, ch2, ch3) \ ((UINT)(BYTE)(ch0) | ((UINT)(BYTE)(ch1) << 8) | \ ((UINT)(BYTE)(ch2) << 16) | ((UINT)(BYTE)(ch3) << 24 )) const UINT DXBC_FOURCC_NAME = DXBC_FOURCC('D','X','B','C'); typedef enum DXBCFourCC { DXBC_GenericShader = DXBC_FOURCC('S','H','D','R'), // same as SHDR, but this will fail on D3D10.x runtimes and not on D3D11+. DXBC_GenericShaderEx = DXBC_FOURCC('S','H','E','X'), DXBC_InputSignature = DXBC_FOURCC('I','S','G','N'), DXBC_InputSignature11_1 = DXBC_FOURCC('I','S','G','1'), DXBC_PatchConstantSignature = DXBC_FOURCC('P','C','S','G'), DXBC_PatchConstantSignature11_1 = DXBC_FOURCC('P','S','G','1'), DXBC_OutputSignature = DXBC_FOURCC('O','S','G','N'), DXBC_OutputSignature5 = DXBC_FOURCC('O','S','G','5'), DXBC_OutputSignature11_1 = DXBC_FOURCC('O','S','G','1'), DXBC_InterfaceData = DXBC_FOURCC('I','F','C','E'), DXBC_ShaderFeatureInfo = DXBC_FOURCC('S','F','I','0'), } DXBCFourCC; #undef DXBC_FOURCC #define DXBC_HASH_SIZE 16 typedef struct DXBCHash { unsigned char Digest[DXBC_HASH_SIZE]; } DXBCHash; typedef struct DXBCVersion { UINT16 Major; UINT16 Minor; } DXBCVersion; typedef struct DXBCHeader { UINT DXBCHeaderFourCC; DXBCHash Hash; DXBCVersion Version; UINT32 ContainerSizeInBytes; // Count from start of this header, including all blobs UINT32 BlobCount; // Structure is followed by UINT32[BlobCount] (the blob index, storing offsets from start of container in bytes // to the start of each blob's header) } DXBCHeader; const UINT32 DXBCHashStartOffset = offsetof(struct DXBCHeader,Version); // hash the container from this offset to the end. const UINT32 DXBCSizeOffset = offsetof(struct DXBCHeader,ContainerSizeInBytes); typedef struct DXBCBlobHeader { DXBCFourCC BlobFourCC; UINT32 BlobSize; // Byte count for BlobData // Structure is followed by BYTE[BlobSize] (the blob's data) } DXBCBlobHeader; class CDXBCParser; //================================================================================================================================= // CDXBCParser // // Parse a DXBC (DX Blob Container) that you provide as input. // // Basic usage: // (1) Call ReadDXBC() or ReadDXBCAssumingValidSize() (latter if you don't know the size, but trust the pointer) // (2) Call various Get*() commands to retrieve information about the container such as // how many blobs are in it, the hash of the container, the version #, and most importantly // retrieve all of the Blobs. You can retrieve blobs by searching for the FourCC, or // enumerate through all of them. Multiple blobs can even have the same FourCC, if you choose to // create the DXBC that way, and this parser will let you discover all of them. // (3) You can parse a new container by calling ReadDXBC() again, or just get rid of the class. // // Read comments inline below for full detail. // // (kuttas) IMPORTANT: This class should be kept lightweight and destructor-free; // other systems assume that this class is relatively small in size (less than a couple of pointers) // and that it does not need cleaning up. If this ceases to be the case, the FX10 // system will need some minor modifications to cope with this. // class CDXBCParser { public: CDXBCParser(); // Sets the container to be parsed, and does some // basic integrity checking, such as ensuring the version is: // Major = DXBC_MAJOR_VERSION // Minor = DXBC_MAJOR_VERSION // // Returns S_OK, or E_FAIL // // Note, if you don't know ContainerSize and are willing to // assume your pointer pContainer is valid, you can call // ReadDXBCAssumingValidSize HRESULT ReadDXBC(const void* pContainer, UINT32 ContainerSizeInBytes); // Same as ReadDXBC(), except this assumes the size field stored inside // pContainer is valid. HRESULT ReadDXBCAssumingValidSize(const void* pContainer); // returns NULL if no valid container is set const DXBCVersion* GetVersion(); // returns NULL if no valid container is set const DXBCHash* GetHash(); UINT32 GetBlobCount(); const void* GetBlob(UINT32 BlobIndex); UINT32 GetBlobSize(UINT32 BlobIndex); UINT GetBlobFourCC(UINT32 BlobIndex); // fixes up internal pointers given that the original bytecode has been moved by ByteOffset bytes HRESULT RelocateBytecode(UINT_PTR ByteOffset); #define DXBC_BLOB_NOT_FOUND -1 // Note: search INCLUDES entry at startindex // returns blob index if found, otherwise returns DXBC_BLOB_NOT_FOUND UINT32 FindNextMatchingBlob( DXBCFourCC SearchFourCC, UINT32 SearchStartBlobIndex = 0 ); private: const DXBCHeader* m_pHeader; const UINT32* m_pIndex; }; ================================================ FILE: DxbcParser/include/DXBCUtils.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include "BlobContainer.h" #include "d3d12TokenizedProgramFormat.hpp" class CSignatureParser; class CSignatureParser5; UINT32 DXBCGetSizeAssumingValidPointer(const void* pDXBC); HRESULT DXBCGetInputSignature( const void* pBlobContainer, CSignatureParser* pParserToUse, bool bForceStringReference = false ); HRESULT DXBCGetOutputSignature( const void* pBlobContainer, CSignatureParser* pParserToUse, bool bForceStringReference = false ); HRESULT DXBCGetOutputSignature( const void* pBlobContainer, CSignatureParser5* pParserToUse, bool bForceStringReference = false ); HRESULT DXBCGetPatchConstantSignature( const void* pBlobContainer, CSignatureParser* pParserToUse, bool bForceStringReference = false ); //================================================================================================================================= // // Signature Parser (not an encoder though) // //--------------------------------------------------------------------------------------------------------------------------------- typedef struct D3D11_SIGNATURE_PARAMETER { UINT Stream; char* SemanticName; UINT SemanticIndex; D3D10_SB_NAME SystemValue; // Internally defined enumeration D3D10_SB_REGISTER_COMPONENT_TYPE ComponentType; UINT Register; BYTE Mask; // (D3D10_SB_OPERAND_4_COMPONENT_MASK >> 4), meaning 4 LSBs are xyzw respectively. // The following unioned fields, NeverWrites_Mask and AlwaysReads_Mask, are exclusively used for // output signatures or input signatures, respectively. // // For an output signature, NeverWrites_Mask indicates that the shader the signature belongs to never // writes to the masked components of the output register. Meaningful bits are the ones set in Mask above. // // For an input signature, AlwaysReads_Mask indicates that the shader the signature belongs to always // reads the masked components of the input register. Meaningful bits are the ones set in the Mask above. // // This allows many shaders to share similar signatures even though some of them may not happen to use // all of the inputs/outputs - something which may not be obvious when authored. The NeverWrites_Mask // and AlwaysReads_Mask can be checked in a debug layer at runtime for the one interesting case: that a // shader that always reads a value is fed by a shader that always writes it. Cases where shaders may // read values or may not cannot be validated unfortunately. // // In scenarios where a signature is being passed around standalone (so it isn't tied to input or output // of a given shader), this union can be zeroed out in the absence of more information. This effectively // forces off linkage validation errors with the signature, since if interpreted as a input or output signature // somehow, since the meaning on output would be "everything is always written" and on input it would be // "nothing is always read". union { BYTE NeverWrites_Mask; // For an output signature, the shader the signature belongs to never // writes the masked components of the output register. BYTE AlwaysReads_Mask; // For an input signature, the shader the signature belongs to always // reads the masked components of the input register. }; D3D_MIN_PRECISION MinPrecision; } D3D11_SIGNATURE_PARAMETER; class CSignatureParser { friend class CSignatureParser5; public: CSignatureParser() { Init(); } ~CSignatureParser() { Cleanup(); } HRESULT ReadSignature11_1( D3D11_SIGNATURE_PARAMETER* pParamList, UINT* pCharSums, UINT NumParameters ); // returns S_OK or E_FAIL HRESULT ReadSignature11_1( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference = false ); // returns S_OK or E_FAIL HRESULT ReadSignature4( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference = false ); // returns S_OK or E_FAIL HRESULT ReadSignature5( D3D11_SIGNATURE_PARAMETER* pParamList, UINT* pCharSums, UINT NumParameters ); // returns S_OK or E_FAIL UINT GetParameters( D3D11_SIGNATURE_PARAMETER const** ppParameters ) const; // Returns element count and array of parameters. Returned memory is // deleted when the class is destroyed or ReadSignature() is called again. UINT GetNumParameters() const {return m_cParameters;} HRESULT FindParameter( LPCSTR SemanticName, UINT SemanticIndex, D3D11_SIGNATURE_PARAMETER** pFoundParameter) const; // Returned memory is deleted when the class is destroyed or ReadSignature is called again. // Returns S_OK if found, E_FAIL if not found, E_OUTOFMEMORY if out of memory HRESULT FindParameterRegister( LPCSTR SemanticName, UINT SemanticIndex, UINT* pFoundParameterRegister); // Returns S_OK if found, E_FAIL if not found, E_OUTOFMEMORY if out of memory UINT GetSemanticNameCharSum( UINT parameter ); // Get the character sum for the name of a parameter to speed comparisons. bool CanOutputTo( CSignatureParser* pTargetSignature ); // (1) Target signature must be identical to beginning of source signature (source signature can have // more entries at the end) // (2) CanOutputTo also accounts for if the target always reads some of the values but the source never // writes them. This can arise when signatures are being reused with many shaders, but some don't // use all the inputs/outputs (the signatures still remain intact). void ClearAlwaysReadsNeverWritesMask(); // Clear out AlwaysReads_Mask / NeverWrites_Mask for all elements in the // signature to force the signature not to cause linkage errors when // passing the signature around after extracting it from a shader. private: void Init(); void Cleanup(); D3D11_SIGNATURE_PARAMETER* m_pSignatureParameters; UINT* m_pSemanticNameCharSums; UINT m_cParameters; BOOL m_OwnParameters; }; class CSignatureParser5 { public: CSignatureParser5() { Init(); } ~CSignatureParser5() { Cleanup(); } HRESULT ReadSignature11_1( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference = false ); // returns S_OK or E_FAIL HRESULT ReadSignature5( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference = false ); // returns S_OK or E_FAIL HRESULT ReadSignature4( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference = false ); // returns S_OK or E_FAIL UINT NumStreams() { return m_NumSigs; } UINT GetTotalParameters() const {return m_cParameters;} const CSignatureParser* Signature( UINT stream ) const { return &m_Sig[stream]; } const CSignatureParser* RastSignature() const { return ( m_RasterizedStream < D3D11_SO_STREAM_COUNT ) ? &m_Sig[m_RasterizedStream] : NULL; } void SetRasterizedStream( UINT stream ) { m_RasterizedStream = stream; } UINT RasterizedStream() { return m_RasterizedStream; }; private: void Init(); void Cleanup(); D3D11_SIGNATURE_PARAMETER* m_pSignatureParameters; UINT* m_pSemanticNameCharSums; UINT m_cParameters; CSignatureParser m_Sig[D3D11_SO_STREAM_COUNT]; UINT m_NumSigs; UINT m_RasterizedStream; }; //================================================================================================================================= // // Shader Feature Info blob // // Structure: // A SShaderFeatureInfo. //--------------------------------------------------------------------------------------------------------------------------------- #define SHADER_FEATURE_DOUBLES 0x0001 #define SHADER_FEATURE_COMPUTE_SHADERS_PLUS_RAW_AND_STRUCTURED_BUFFERS_VIA_SHADER_4_X 0x0002 #define SHADER_FEATURE_UAVS_AT_EVERY_STAGE 0x0004 #define SHADER_FEATURE_64_UAVS 0x0008 #define SHADER_FEATURE_MINIMUM_PRECISION 0x0010 #define SHADER_FEATURE_11_1_DOUBLE_EXTENSIONS 0x0020 #define SHADER_FEATURE_11_1_SHADER_EXTENSIONS 0x0040 #define SHADER_FEATURE_LEVEL_9_COMPARISON_FILTERING 0x0080 #define SHADER_FEATURE_TILED_RESOURCES 0x0100 #define SHADER_FEATURE_STENCIL_REF 0x0200 #define SHADER_FEATURE_INNER_COVERAGE 0x0400 #define SHADER_FEATURE_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x0800 #define SHADER_FEATURE_ROVS 0x1000 #define SHADER_FEATURE_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x2000 // The bitfield below defines a set of optional bits for future use at the top end. If a bit is set that is not // in the optional range, the runtime fill fail shader creation. #define D3D11_OPTIONAL_FEATURE_FLAGS 0x7FFFFF0000000000 struct SShaderFeatureInfo { UINT64 FeatureFlags; }; ================================================ FILE: DxbcParser/include/pch.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include #include ================================================ FILE: DxbcParser/src/BlobContainer.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include //================================================================================================================================= // CDXBCParser //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::CDXBCParser CDXBCParser::CDXBCParser() { m_pHeader = NULL; m_pIndex = NULL; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::ReadDXBC() HRESULT CDXBCParser::ReadDXBC( const void* pContainer, UINT ContainerSizeInBytes ) { if (!pContainer) { return E_FAIL; } if (ContainerSizeInBytes < sizeof( DXBCHeader )) { return E_FAIL; } DXBCHeader* pHeader = (DXBCHeader*)pContainer; if (pHeader->ContainerSizeInBytes != ContainerSizeInBytes) { return E_FAIL; } if ((pHeader->DXBCHeaderFourCC != DXBC_FOURCC_NAME) || (pHeader->Version.Major != DXBC_MAJOR_VERSION) || (pHeader->Version.Minor != DXBC_MINOR_VERSION)) { return E_FAIL; } const void* pContainerEnd = ((const BYTE*)pHeader + ContainerSizeInBytes); if (pContainerEnd < pContainer) { return E_FAIL; } UINT* pIndex = (UINT*)((BYTE*)pHeader + sizeof( DXBCHeader )); //CodeQL [SM03443] Intending to check for pointer overflow along with normal size checks if ((const BYTE*)pContainer + sizeof( UINT ) * pHeader->BlobCount < (const BYTE*)pContainer) { return E_FAIL; // overflow would break the calculation of OffsetOfCurrentSegmentEnd below } UINT OffsetOfCurrentSegmentEnd = (UINT)((BYTE*)pIndex - (const BYTE*)pContainer + sizeof( UINT ) * pHeader->BlobCount - 1); // Is the entire index within the container? if (OffsetOfCurrentSegmentEnd > ContainerSizeInBytes) { return E_FAIL; } // Is each blob in the index directly after the previous entry and not past the end of the container? UINT OffsetOfLastSegmentEnd = OffsetOfCurrentSegmentEnd; for (UINT b = 0; b < pHeader->BlobCount; b++) { DXBCBlobHeader* pBlobHeader = (DXBCBlobHeader*)((const BYTE*)pContainer + pIndex[b]); DXBCBlobHeader* pAfterBlobHeader = pBlobHeader + 1; if (pAfterBlobHeader < pBlobHeader || pAfterBlobHeader > pContainerEnd) { return E_FAIL; } if (((BYTE*)pBlobHeader < (const BYTE*)pContainer) || (pIndex[b] + sizeof( DXBCBlobHeader ) < pIndex[b])) { return E_FAIL; // overflow because of bad pIndex[b] value } if (pIndex[b] + sizeof( DXBCBlobHeader ) + pBlobHeader->BlobSize < pIndex[b]) { return E_FAIL; // overflow because of bad pBlobHeader->BlobSize value } OffsetOfCurrentSegmentEnd = pIndex[b] + sizeof( DXBCBlobHeader ) + pBlobHeader->BlobSize - 1; if (OffsetOfCurrentSegmentEnd > ContainerSizeInBytes) { return E_FAIL; } if (OffsetOfLastSegmentEnd != pIndex[b] - 1) { return E_FAIL; } OffsetOfLastSegmentEnd = OffsetOfCurrentSegmentEnd; } // Ok, satisfied with integrity of container, store info. m_pHeader = pHeader; m_pIndex = pIndex; return S_OK; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::ReadDXBCAssumingValidSize() HRESULT CDXBCParser::ReadDXBCAssumingValidSize( const void* pContainer ) { if (!pContainer) { return E_FAIL; } return ReadDXBC( (const BYTE*)pContainer, DXBCGetSizeAssumingValidPointer( (const BYTE*)pContainer ) ); } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::FindNextMatchingBlob UINT CDXBCParser::FindNextMatchingBlob( DXBCFourCC SearchFourCC, UINT SearchStartBlobIndex ) { if (!m_pHeader || !m_pIndex) { return (UINT)DXBC_BLOB_NOT_FOUND; } for (UINT b = SearchStartBlobIndex; b < m_pHeader->BlobCount; b++) { DXBCBlobHeader* pBlob = (DXBCBlobHeader*)((BYTE*)m_pHeader + m_pIndex[b]); if (pBlob->BlobFourCC == SearchFourCC) { return b; } } return (UINT)DXBC_BLOB_NOT_FOUND; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::GetVersion() const DXBCVersion* CDXBCParser::GetVersion() { return m_pHeader ? &m_pHeader->Version : NULL; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::GetHash() const DXBCHash* CDXBCParser::GetHash() { return m_pHeader ? &m_pHeader->Hash : NULL; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::GetBlobCount() UINT CDXBCParser::GetBlobCount() { return m_pHeader ? m_pHeader->BlobCount : 0; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::GetBlob() const void* CDXBCParser::GetBlob( UINT BlobIndex ) { if (!m_pHeader || !m_pIndex || m_pHeader->BlobCount <= BlobIndex) { return NULL; } return (BYTE*)m_pHeader + m_pIndex[BlobIndex] + sizeof( DXBCBlobHeader ); } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::GetBlobSize() UINT CDXBCParser::GetBlobSize( UINT BlobIndex ) { if (!m_pHeader || !m_pIndex || m_pHeader->BlobCount <= BlobIndex) { return 0; } return ((DXBCBlobHeader*)((BYTE*)m_pHeader + m_pIndex[BlobIndex]))->BlobSize; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::GetBlobFourCC() UINT CDXBCParser::GetBlobFourCC( UINT BlobIndex ) { if (!m_pHeader || !m_pIndex || m_pHeader->BlobCount <= BlobIndex) { return 0; } return ((DXBCBlobHeader*)((BYTE*)m_pHeader + m_pIndex[BlobIndex]))->BlobFourCC; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCParser::RelocateBytecode() HRESULT CDXBCParser::RelocateBytecode( UINT_PTR ByteOffset ) { if (!m_pHeader || !m_pIndex) { // bad -- has not been initialized yet return E_FAIL; } m_pHeader = (const DXBCHeader*)((const BYTE*)m_pHeader + ByteOffset); m_pIndex = (const UINT32*)((const BYTE*)m_pIndex + ByteOffset); return S_OK; } ================================================ FILE: DxbcParser/src/DXBCUtils.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include //--------------------------------------------------------------------------------------------------------------------------------- // DXBCGetSizeAssumingValidPointer() UINT DXBCGetSizeAssumingValidPointer(const void* pDXBC) { if( !pDXBC ) return 0; return *(UINT*)((const BYTE*)pDXBC + DXBCSizeOffset); } //--------------------------------------------------------------------------------------------------------------------------------- // DXBCGetInputSignature() HRESULT DXBCGetInputSignature( const void* pBlobContainer, CSignatureParser* pParserToUse, bool bForceStringReference ) { CDXBCParser DXBCParser; HRESULT hr = S_OK; if( FAILED(hr = DXBCParser.ReadDXBCAssumingValidSize(pBlobContainer) ) ) { return hr; } UINT BlobIndex = DXBCParser.FindNextMatchingBlob(DXBC_InputSignature11_1, 0); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature11_1(pUnParsedSignature, BlobSize, bForceStringReference); } BlobIndex = DXBCParser.FindNextMatchingBlob(DXBC_InputSignature, 0); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature4(pUnParsedSignature, BlobSize, bForceStringReference); } return E_FAIL; } //--------------------------------------------------------------------------------------------------------------------------------- // DXBCGetOutputSignature() HRESULT DXBCGetOutputSignature( const void* pBlobContainer, CSignatureParser5* pParserToUse, bool bForceStringReference ) { CDXBCParser DXBCParser; HRESULT hr = S_OK; if( FAILED(hr = DXBCParser.ReadDXBCAssumingValidSize(pBlobContainer) ) ) { return hr; } UINT BlobIndex = DXBCParser.FindNextMatchingBlob( DXBC_OutputSignature11_1, 0 ); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature11_1(pUnParsedSignature, BlobSize, bForceStringReference); } BlobIndex = DXBCParser.FindNextMatchingBlob( DXBC_OutputSignature, 0 ); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature4(pUnParsedSignature, BlobSize, bForceStringReference); } BlobIndex = DXBCParser.FindNextMatchingBlob( DXBC_OutputSignature5, 0 ); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature5(pUnParsedSignature, BlobSize, bForceStringReference); } return E_FAIL; } HRESULT DXBCGetOutputSignature( const void* pBlobContainer, CSignatureParser* pParserToUse, bool bForceStringReference) { CDXBCParser DXBCParser; HRESULT hr = S_OK; if( FAILED(hr = DXBCParser.ReadDXBCAssumingValidSize(pBlobContainer) ) ) { return hr; } UINT BlobIndex = DXBCParser.FindNextMatchingBlob( DXBC_OutputSignature11_1, 0 ); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature11_1(pUnParsedSignature, BlobSize, bForceStringReference); } BlobIndex = DXBCParser.FindNextMatchingBlob( DXBC_OutputSignature, 0 ); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature4(pUnParsedSignature, BlobSize, bForceStringReference); } return E_FAIL; } HRESULT DXBCGetPatchConstantSignature( const void* pBlobContainer, CSignatureParser* pParserToUse, bool bForceStringReference) { CDXBCParser DXBCParser; HRESULT hr = S_OK; if( FAILED(hr = DXBCParser.ReadDXBCAssumingValidSize(pBlobContainer) ) ) { return hr; } UINT BlobIndex = DXBCParser.FindNextMatchingBlob( DXBC_PatchConstantSignature11_1, 0 ); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature11_1(pUnParsedSignature, BlobSize, bForceStringReference); } BlobIndex = DXBCParser.FindNextMatchingBlob( DXBC_PatchConstantSignature, 0 ); if( BlobIndex != DXBC_BLOB_NOT_FOUND ) { const void* pUnParsedSignature = DXBCParser.GetBlob(BlobIndex); UINT BlobSize = DXBCParser.GetBlobSize(BlobIndex); if( !BlobSize ) { return E_FAIL; } assert(pUnParsedSignature); return pParserToUse->ReadSignature4(pUnParsedSignature, BlobSize, bForceStringReference); } return E_FAIL; } //================================================================================================================================= // CSignatureParser //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::Init() void CSignatureParser::Init() { m_pSignatureParameters = NULL; m_pSemanticNameCharSums = NULL; m_cParameters = 0; m_OwnParameters = TRUE; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::Cleanup() void CSignatureParser::Cleanup() { if( m_pSignatureParameters ) { if( m_OwnParameters ) { free(m_pSignatureParameters); } m_pSignatureParameters = NULL; } m_pSemanticNameCharSums = NULL; m_cParameters = 0; m_OwnParameters = TRUE; } //--------------------------------------------------------------------------------------------------------------------------------- // BoundedStringLength // // Safely returns length of null-terminated string pointed to by pBegin (not including null) // given that the memory [ pBegin, pEnd ) is readable. // // Returns an error if pBegin doesn't point to a valid NT string. // HRESULT BoundedStringLength(__in_ecount(pEnd-pBegin) const char *pBegin, const char *pEnd, __out_ecount(1) UINT *pLength) { if (pBegin >= pEnd) { // No readable memory! return E_FAIL; } const char *pc = pBegin; while (pc < pEnd && *pc) { ++pc; } if (pc == pEnd) { return E_FAIL; } assert(!*pc); *pLength = (UINT)(pc-pBegin); return S_OK; } typedef struct _D3D10_INTERNALSHADER_SIGNATURE { UINT Parameters; // Number of parameters UINT ParameterInfo; // Offset to D3D10_INTERNALSHADER_PARAMETER[Parameters] } D3D10_INTERNALSHADER_SIGNATURE, *LPD3D10_INTERNALSHADER_SIGNATURE; typedef struct _D3D10_INTERNALSHADER_PARAMETER { UINT SemanticName; // Offset to LPCSTR UINT SemanticIndex; // Semantic Index D3D10_NAME SystemValue; // Internally defined enumeration D3D10_REGISTER_COMPONENT_TYPE ComponentType; // Type of of bits UINT Register; // Register Index BYTE Mask; // Combination of D3D10_COMPONENT_MASK values // The following unioned fields, NeverWrites_Mask and AlwaysReads_Mask, are exclusively used for // output signatures or input signatures, respectively. // // For an output signature, NeverWrites_Mask indicates that the shader the signature belongs to never // writes to the masked components of the output register. Meaningful bits are the ones set in Mask above. // // For an input signature, AlwaysReads_Mask indicates that the shader the signature belongs to always // reads the masked components of the input register. Meaningful bits are the ones set in the Mask above. // // This allows many shaders to share similar signatures even though some of them may not happen to use // all of the inputs/outputs - something which may not be obvious when authored. The NeverWrites_Mask // and AlwaysReads_Mask can be checked in a debug layer at runtime for the one interesting case: that a // shader that always reads a value is fed by a shader that always writes it. Cases where shaders may // read values or may not cannot be validated unfortunately. // // In scenarios where a signature is being passed around standalone (so it isn't tied to input or output // of a given shader), this union can be zeroed out in the absence of more information. This effectively // forces off linkage validation errors with the signature, since if interpreted as a input or output signature // somehow, since the meaning on output would be "everything is always written" and on input it would be // "nothing is always read". union { BYTE NeverWrites_Mask; // For an output signature, the shader the signature belongs to never // writes the masked components of the output register. BYTE AlwaysReads_Mask; // For an input signature, the shader the signature belongs to always // reads the masked components of the input register. }; } D3D10_INTERNALSHADER_PARAMETER, *LPD3D10_INTERNALSHADER_PARAMETER; typedef struct _D3D11_INTERNALSHADER_PARAMETER_11_1 { UINT Stream; // Stream index (parameters must appear in non-decreasing stream order) UINT SemanticName; // Offset to LPCSTR UINT SemanticIndex; // Semantic Index D3D10_NAME SystemValue; // Internally defined enumeration D3D10_REGISTER_COMPONENT_TYPE ComponentType; // Type of of bits UINT Register; // Register Index BYTE Mask; // Combination of D3D10_COMPONENT_MASK values // The following unioned fields, NeverWrites_Mask and AlwaysReads_Mask, are exclusively used for // output signatures or input signatures, respectively. // // For an output signature, NeverWrites_Mask indicates that the shader the signature belongs to never // writes to the masked components of the output register. Meaningful bits are the ones set in Mask above. // // For an input signature, AlwaysReads_Mask indicates that the shader the signature belongs to always // reads the masked components of the input register. Meaningful bits are the ones set in the Mask above. // // This allows many shaders to share similar signatures even though some of them may not happen to use // all of the inputs/outputs - something which may not be obvious when authored. The NeverWrites_Mask // and AlwaysReads_Mask can be checked in a debug layer at runtime for the one interesting case: that a // shader that always reads a value is fed by a shader that always writes it. Cases where shaders may // read values or may not cannot be validated unfortunately. // // In scenarios where a signature is being passed around standalone (so it isn't tied to input or output // of a given shader), this union can be zeroed out in the absence of more information. This effectively // forces off linkage validation errors with the signature, since if interpreted as a input or output signature // somehow, since the meaning on output would be "everything is always written" and on input it would be // "nothing is always read". union { BYTE NeverWrites_Mask; // For an output signature, the shader the signature belongs to never // writes the masked components of the output register. BYTE AlwaysReads_Mask; // For an input signature, the shader the signature belongs to always // reads the masked components of the input register. }; D3D_MIN_PRECISION MinPrecision; // Minimum precision of input/output data } D3D11_INTERNALSHADER_PARAMETER_11_1, *LPD3D11_INTERNALSHADER_PARAMETER_11_1; typedef struct _D3D11_INTERNALSHADER_PARAMETER_FOR_GS { UINT Stream; // Stream index (parameters must appear in non-decreasing stream order) UINT SemanticName; // Offset to LPCSTR UINT SemanticIndex; // Semantic Index D3D10_NAME SystemValue; // Internally defined enumeration D3D10_REGISTER_COMPONENT_TYPE ComponentType; // Type of of bits UINT Register; // Register Index BYTE Mask; // Combination of D3D10_COMPONENT_MASK values // The following unioned fields, NeverWrites_Mask and AlwaysReads_Mask, are exclusively used for // output signatures or input signatures, respectively. // // For an output signature, NeverWrites_Mask indicates that the shader the signature belongs to never // writes to the masked components of the output register. Meaningful bits are the ones set in Mask above. // // For an input signature, AlwaysReads_Mask indicates that the shader the signature belongs to always // reads the masked components of the input register. Meaningful bits are the ones set in the Mask above. // // This allows many shaders to share similar signatures even though some of them may not happen to use // all of the inputs/outputs - something which may not be obvious when authored. The NeverWrites_Mask // and AlwaysReads_Mask can be checked in a debug layer at runtime for the one interesting case: that a // shader that always reads a value is fed by a shader that always writes it. Cases where shaders may // read values or may not cannot be validated unfortunately. // // In scenarios where a signature is being passed around standalone (so it isn't tied to input or output // of a given shader), this union can be zeroed out in the absence of more information. This effectively // forces off linkage validation errors with the signature, since if interpreted as a input or output signature // somehow, since the meaning on output would be "everything is always written" and on input it would be // "nothing is always read". union { BYTE NeverWrites_Mask; // For an output signature, the shader the signature belongs to never // writes the masked components of the output register. BYTE AlwaysReads_Mask; // For an input signature, the shader the signature belongs to always // reads the masked components of the input register. }; } D3D11_INTERNALSHADER_PARAMETER_FOR_GS, *LPD3D11_INTERNALSHADER_PARAMETER_FOR_GS; inline D3D10_SB_NAME ConvertToSB(D3D10_NAME Value, UINT SemanticIndex) { switch (Value) { case D3D10_NAME_TARGET: case D3D10_NAME_DEPTH: case D3D10_NAME_COVERAGE: case D3D10_NAME_UNDEFINED: return D3D10_SB_NAME_UNDEFINED; case D3D10_NAME_POSITION: return D3D10_SB_NAME_POSITION; case D3D10_NAME_CLIP_DISTANCE: return D3D10_SB_NAME_CLIP_DISTANCE; case D3D10_NAME_CULL_DISTANCE: return D3D10_SB_NAME_CULL_DISTANCE; case D3D10_NAME_RENDER_TARGET_ARRAY_INDEX: return D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX; case D3D10_NAME_VIEWPORT_ARRAY_INDEX: return D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX; case D3D10_NAME_VERTEX_ID: return D3D10_SB_NAME_VERTEX_ID; case D3D10_NAME_PRIMITIVE_ID: return D3D10_SB_NAME_PRIMITIVE_ID; case D3D10_NAME_INSTANCE_ID: return D3D10_SB_NAME_INSTANCE_ID; case D3D10_NAME_IS_FRONT_FACE: return D3D10_SB_NAME_IS_FRONT_FACE; case D3D10_NAME_SAMPLE_INDEX: return D3D10_SB_NAME_SAMPLE_INDEX; case D3D11_NAME_FINAL_QUAD_EDGE_TESSFACTOR: switch (SemanticIndex) { case 0: return D3D11_SB_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR; case 1: return D3D11_SB_NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR; case 2: return D3D11_SB_NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR; case 3: return D3D11_SB_NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR; } assert("Invalid D3D10_NAME"); break; case D3D11_NAME_FINAL_QUAD_INSIDE_TESSFACTOR: switch (SemanticIndex) { case 0: return D3D11_SB_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR; case 1: return D3D11_SB_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR; } assert("Invalid D3D10_NAME"); break; case D3D11_NAME_FINAL_TRI_EDGE_TESSFACTOR: switch (SemanticIndex) { case 0: return D3D11_SB_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR; case 1: return D3D11_SB_NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR; case 2: return D3D11_SB_NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR; } assert("Invalid D3D10_NAME"); break; case D3D11_NAME_FINAL_TRI_INSIDE_TESSFACTOR: switch (SemanticIndex) { case 0: return D3D11_SB_NAME_FINAL_TRI_INSIDE_TESSFACTOR; } assert("Invalid D3D10_NAME"); break; case D3D11_NAME_FINAL_LINE_DETAIL_TESSFACTOR: switch (SemanticIndex) { case 1: return D3D11_SB_NAME_FINAL_LINE_DETAIL_TESSFACTOR; } assert("Invalid D3D10_NAME"); break; case D3D11_NAME_FINAL_LINE_DENSITY_TESSFACTOR: switch (SemanticIndex) { case 0: return D3D11_SB_NAME_FINAL_LINE_DENSITY_TESSFACTOR; } assert("Invalid D3D10_NAME"); break; default: assert("Invalid D3D10_NAME"); } // in retail the assert won't get hit and we'll just pass through anything bad. return (D3D10_SB_NAME)Value; } inline D3D10_SB_RESOURCE_DIMENSION ConvertToSB(D3D11_SRV_DIMENSION Value) { switch (Value) { case D3D11_SRV_DIMENSION_UNKNOWN: return D3D10_SB_RESOURCE_DIMENSION_UNKNOWN; case D3D11_SRV_DIMENSION_BUFFER: case D3D11_SRV_DIMENSION_BUFFEREX: return D3D10_SB_RESOURCE_DIMENSION_BUFFER; case D3D11_SRV_DIMENSION_TEXTURE1D: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D; case D3D11_SRV_DIMENSION_TEXTURE2D: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D; case D3D11_SRV_DIMENSION_TEXTURE2DMS: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS; case D3D11_SRV_DIMENSION_TEXTURE3D: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D; case D3D11_SRV_DIMENSION_TEXTURECUBE: return D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE; case D3D11_SRV_DIMENSION_TEXTURECUBEARRAY: return D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY; case D3D11_SRV_DIMENSION_TEXTURE1DARRAY: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY; case D3D11_SRV_DIMENSION_TEXTURE2DARRAY: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY; case D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY; default: assert("Invalid D3D11_RESOURCE_DIMENSION"); } // in retail the assert won't get hit and we'll just pass through anything bad. return (D3D10_SB_RESOURCE_DIMENSION)Value; } inline D3D10_SB_RESOURCE_DIMENSION ConvertToSB(D3D11_UAV_DIMENSION Value) { switch (Value) { case D3D11_UAV_DIMENSION_UNKNOWN: return D3D10_SB_RESOURCE_DIMENSION_UNKNOWN; case D3D11_UAV_DIMENSION_BUFFER: return D3D10_SB_RESOURCE_DIMENSION_BUFFER; case D3D11_UAV_DIMENSION_TEXTURE1D: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D; case D3D11_UAV_DIMENSION_TEXTURE2D: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D; case D3D11_UAV_DIMENSION_TEXTURE3D: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D; case D3D11_UAV_DIMENSION_TEXTURE1DARRAY: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY; case D3D11_UAV_DIMENSION_TEXTURE2DARRAY: return D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY; default: assert("Invalid D3D11_RESOURCE_DIMENSION"); } // in retail the assert won't get hit and we'll just pass through anything bad. return (D3D10_SB_RESOURCE_DIMENSION)Value; } inline D3D10_SB_REGISTER_COMPONENT_TYPE ConvertToSB(D3D10_REGISTER_COMPONENT_TYPE Value) { switch (Value) { case D3D10_REGISTER_COMPONENT_UNKNOWN: return D3D10_SB_REGISTER_COMPONENT_UNKNOWN; case D3D10_REGISTER_COMPONENT_UINT32: return D3D10_SB_REGISTER_COMPONENT_UINT32; case D3D10_REGISTER_COMPONENT_SINT32: return D3D10_SB_REGISTER_COMPONENT_SINT32; case D3D10_REGISTER_COMPONENT_FLOAT32: return D3D10_SB_REGISTER_COMPONENT_FLOAT32; default: assert("Invalid D3D10_REGISTER_COMPONENT_TYPE"); } // in retail the assert won't get hit and we'll just pass through anything bad. return (D3D10_SB_REGISTER_COMPONENT_TYPE)Value; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::ReadSignature() HRESULT CSignatureParser::ReadSignature11_1( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference ) { if( m_cParameters ) { Cleanup(); } if( !pSignature || BlobSize < sizeof(D3D10_INTERNALSHADER_SIGNATURE) ) { return E_FAIL; } D3D10_INTERNALSHADER_SIGNATURE* pHeader = (D3D10_INTERNALSHADER_SIGNATURE*)pSignature; if( pHeader->Parameters == 0 ) { m_cParameters = 0; return S_OK; } // If parameter count is less than MaxParameters calculation of SignatureAndParameterInfoSize will not overflow. const UINT MaxParameters = ((UINT_MAX - sizeof(D3D10_INTERNALSHADER_SIGNATURE)) / sizeof(D3D10_INTERNALSHADER_PARAMETER)); if (pHeader->Parameters > MaxParameters) { return E_FAIL; } UINT ParameterInfoSize = pHeader->Parameters * sizeof(D3D11_INTERNALSHADER_PARAMETER_11_1); UINT SignatureAndParameterInfoSize = ParameterInfoSize + sizeof(D3D10_INTERNALSHADER_SIGNATURE); if (BlobSize < SignatureAndParameterInfoSize) { return E_FAIL; } // Keep end pointer around for checking const void *pEnd = static_cast(pSignature) + BlobSize; const D3D11_INTERNALSHADER_PARAMETER_11_1* pParameterInfo = (D3D11_INTERNALSHADER_PARAMETER_11_1*)((const BYTE*)pSignature + pHeader->ParameterInfo); UINT cParameters = pHeader->Parameters; UINT TotalStringLength = 0; UINT LastRegister = 0; for( UINT i = 0; i < cParameters; i++ ) { UINT StringLength; if (FAILED(BoundedStringLength((const char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName), (const char*) pEnd, &StringLength))) { return E_FAIL; } if (!bForceStringReference) { TotalStringLength += StringLength + 1; } if( i > 0 ) { // registers must show up in nondecreasing order in a signature if( LastRegister > pParameterInfo[i].Register ) { return E_FAIL; } } LastRegister = pParameterInfo[i].Register; } UINT TotalParameterSize = pHeader->Parameters*sizeof(D3D11_SIGNATURE_PARAMETER); UINT TotalCharSumsSize = sizeof(UINT)*cParameters; // char sums for each SemanticName m_pSignatureParameters = (D3D11_SIGNATURE_PARAMETER*)malloc((TotalParameterSize + TotalCharSumsSize + TotalStringLength)*sizeof(BYTE)); if( !m_pSignatureParameters ) { return E_OUTOFMEMORY; } m_OwnParameters = TRUE; m_pSemanticNameCharSums = (UINT*)((BYTE*)m_pSignatureParameters + TotalParameterSize); char* pNextDstString = (char*)((BYTE*)m_pSemanticNameCharSums + TotalCharSumsSize); for( UINT i = 0; i < cParameters; i++ ) { char* pNextSrcString = (char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName); m_pSignatureParameters[i].Stream = 0; m_pSignatureParameters[i].ComponentType = ConvertToSB(pParameterInfo[i].ComponentType); m_pSignatureParameters[i].Mask = pParameterInfo[i].Mask; m_pSignatureParameters[i].Register = pParameterInfo[i].Register; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].SystemValue = ConvertToSB(pParameterInfo[i].SystemValue,pParameterInfo[i].SemanticIndex); m_pSignatureParameters[i].SemanticName = bForceStringReference ? pNextSrcString : pNextDstString; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].NeverWrites_Mask = pParameterInfo[i].NeverWrites_Mask; // union with AlwaysReadMask m_pSignatureParameters[i].MinPrecision = pParameterInfo[i].MinPrecision; // This strlen was checked with BoundedStringLength in the first loop. #pragma prefast( suppress : __WARNING_PRECONDITION_NULLTERMINATION_VIOLATION ) UINT length = (UINT)strlen(pNextSrcString) + 1; if (!bForceStringReference) { // Calculation of TotalStringLength ensures that we have space in pNextDstString #pragma prefast( suppress : __WARNING_POTENTIAL_BUFFER_OVERFLOW_LOOP_DEPENDENT ) memcpy(pNextDstString, pNextSrcString, length); pNextDstString += length; } m_pSemanticNameCharSums[i] = 0; for( UINT j = 0; j < length; j++ ) { m_pSemanticNameCharSums[i] += tolower(pNextSrcString[j]); } } m_cParameters = cParameters; return S_OK; } HRESULT CSignatureParser::ReadSignature4( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference ) { if( m_cParameters ) { Cleanup(); } if( !pSignature || BlobSize < sizeof(D3D10_INTERNALSHADER_SIGNATURE) ) { return E_FAIL; } D3D10_INTERNALSHADER_SIGNATURE* pHeader = (D3D10_INTERNALSHADER_SIGNATURE*)pSignature; if( pHeader->Parameters == 0 ) { m_cParameters = 0; return S_OK; } // If parameter count is less than MaxParameters calculation of SignatureAndParameterInfoSize will not overflow. const UINT MaxParameters = ((UINT_MAX - sizeof(D3D10_INTERNALSHADER_SIGNATURE)) / sizeof(D3D10_INTERNALSHADER_PARAMETER)); if (pHeader->Parameters > MaxParameters) { return E_FAIL; } UINT ParameterInfoSize = pHeader->Parameters * sizeof(D3D10_INTERNALSHADER_PARAMETER); UINT SignatureAndParameterInfoSize = ParameterInfoSize + sizeof(D3D10_INTERNALSHADER_SIGNATURE); if (BlobSize < SignatureAndParameterInfoSize) { return E_FAIL; } // Keep end pointer around for checking const void *pEnd = static_cast(pSignature) + BlobSize; const D3D10_INTERNALSHADER_PARAMETER* pParameterInfo = (D3D10_INTERNALSHADER_PARAMETER*)((const BYTE*)pSignature + pHeader->ParameterInfo); UINT cParameters = pHeader->Parameters; UINT TotalStringLength = 0; UINT LastRegister = 0; for( UINT i = 0; i < cParameters; i++ ) { UINT StringLength; if (FAILED(BoundedStringLength((const char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName), (const char*) pEnd, &StringLength))) { return E_FAIL; } if (!bForceStringReference) { TotalStringLength += StringLength + 1; } if( i > 0 ) { // registers must show up in nondecreasing order in a signature if( LastRegister > pParameterInfo[i].Register ) { return E_FAIL; } } LastRegister = pParameterInfo[i].Register; } UINT TotalParameterSize = pHeader->Parameters*sizeof(D3D11_SIGNATURE_PARAMETER); UINT TotalCharSumsSize = sizeof(UINT)*cParameters; // char sums for each SemanticName m_pSignatureParameters = (D3D11_SIGNATURE_PARAMETER*)malloc((TotalParameterSize + TotalCharSumsSize + TotalStringLength)*sizeof(BYTE)); if( !m_pSignatureParameters ) { return E_OUTOFMEMORY; } m_OwnParameters = TRUE; m_pSemanticNameCharSums = (UINT*)((BYTE*)m_pSignatureParameters + TotalParameterSize); char* pNextDstString = (char*)((BYTE*)m_pSemanticNameCharSums + TotalCharSumsSize); for( UINT i = 0; i < cParameters; i++ ) { char* pNextSrcString = (char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName); m_pSignatureParameters[i].Stream = 0; m_pSignatureParameters[i].ComponentType = ConvertToSB(pParameterInfo[i].ComponentType); m_pSignatureParameters[i].Mask = pParameterInfo[i].Mask; m_pSignatureParameters[i].Register = pParameterInfo[i].Register; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].SystemValue = ConvertToSB(pParameterInfo[i].SystemValue,pParameterInfo[i].SemanticIndex); m_pSignatureParameters[i].SemanticName = bForceStringReference ? pNextSrcString : pNextDstString; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].NeverWrites_Mask = pParameterInfo[i].NeverWrites_Mask; // union with AlwaysReadMask m_pSignatureParameters[i].MinPrecision = D3D_MIN_PRECISION_DEFAULT; // This strlen was checked with BoundedStringLength in the first loop. #pragma prefast( suppress : __WARNING_PRECONDITION_NULLTERMINATION_VIOLATION ) UINT length = (UINT)strlen(pNextSrcString) + 1; if (!bForceStringReference) { // Calculation of TotalStringLength ensures that we have space in pNextDstString #pragma prefast( suppress : __WARNING_POTENTIAL_BUFFER_OVERFLOW_LOOP_DEPENDENT ) memcpy(pNextDstString, pNextSrcString, length); pNextDstString += length; } m_pSemanticNameCharSums[i] = 0; for( UINT j = 0; j < length; j++ ) { m_pSemanticNameCharSums[i] += tolower(pNextSrcString[j]); } } m_cParameters = cParameters; return S_OK; } HRESULT CSignatureParser::ReadSignature11_1( D3D11_SIGNATURE_PARAMETER* pParamList, UINT* pCharSums, UINT NumParameters ) { if( m_cParameters ) { Cleanup(); } if( !pParamList ) { return S_OK; } m_pSignatureParameters = pParamList; m_pSemanticNameCharSums = pCharSums; m_cParameters = NumParameters; m_OwnParameters = FALSE; return S_OK; } HRESULT CSignatureParser::ReadSignature5( D3D11_SIGNATURE_PARAMETER* pParamList, UINT* pCharSums, UINT NumParameters ) { if( m_cParameters ) { Cleanup(); } if( !pParamList ) { return S_OK; } m_pSignatureParameters = pParamList; m_pSemanticNameCharSums = pCharSums; m_cParameters = NumParameters; m_OwnParameters = FALSE; return S_OK; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser5::Init() void CSignatureParser5::Init() { m_NumSigs = 0; m_cParameters = 0; m_pSignatureParameters = NULL; m_RasterizedStream = 0; for( UINT i=0; i < D3D11_SO_STREAM_COUNT; i++ ) { m_Sig[i].Init(); } } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser5::Cleanup() void CSignatureParser5::Cleanup() { for( UINT i = 0; i < D3D11_SO_STREAM_COUNT; i++ ) { if( m_Sig[i].m_cParameters ) { m_Sig[i].Cleanup(); } } if( m_pSignatureParameters ) { free( m_pSignatureParameters ); m_pSignatureParameters = NULL; } m_NumSigs = 0; m_RasterizedStream = 0; m_cParameters = 0; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::ReadSignature11_1() HRESULT CSignatureParser5::ReadSignature11_1( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference ) { Cleanup(); if( !pSignature || BlobSize < sizeof(D3D10_INTERNALSHADER_SIGNATURE) ) { return E_FAIL; } D3D10_INTERNALSHADER_SIGNATURE* pHeader = (D3D10_INTERNALSHADER_SIGNATURE*)pSignature; if( pHeader->Parameters == 0 ) { m_cParameters = 0; return S_OK; } // If parameter count is less than MaxParameters calculation of SignatureAndParameterInfoSize will not overflow. const UINT MaxParameters = ((UINT_MAX - sizeof(D3D10_INTERNALSHADER_SIGNATURE)) / sizeof(D3D11_INTERNALSHADER_PARAMETER_11_1)); if (pHeader->Parameters > MaxParameters) { return E_FAIL; } UINT ParameterInfoSize = pHeader->Parameters * sizeof(D3D11_INTERNALSHADER_PARAMETER_11_1); UINT SignatureAndParameterInfoSize = ParameterInfoSize + sizeof(D3D10_INTERNALSHADER_SIGNATURE); if (BlobSize < SignatureAndParameterInfoSize) { return E_FAIL; } // Keep end pointer around for checking const void *pEnd = static_cast(pSignature) + BlobSize; const D3D11_INTERNALSHADER_PARAMETER_11_1* pParameterInfo = (D3D11_INTERNALSHADER_PARAMETER_11_1*)((const BYTE*)pSignature + pHeader->ParameterInfo); UINT cParameters = pHeader->Parameters; UINT TotalStringLength = 0; UINT StreamParameters[D3D11_SO_STREAM_COUNT] = {0}; UINT i = 0; for( UINT s = 0; s < D3D11_SO_STREAM_COUNT; s++ ) { UINT LastRegister = 0; UINT FirstParameter = i; for( ; i < cParameters; i++ ) { if( pParameterInfo[i].Stream != s ) { break; } StreamParameters[s]++; UINT StringLength; if (FAILED(BoundedStringLength((const char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName), (const char*) pEnd, &StringLength))) { return E_FAIL; } if (!bForceStringReference) { TotalStringLength += StringLength + 1; } if( i > FirstParameter ) { // registers must show up in nondecreasing order in a signature if( LastRegister > pParameterInfo[i].Register ) { return E_FAIL; } } LastRegister = pParameterInfo[i].Register; m_NumSigs = s + 1; } } UINT TotalParameterSize = pHeader->Parameters*sizeof(D3D11_SIGNATURE_PARAMETER); UINT TotalCharSumsSize = sizeof(UINT)*cParameters; // char sums for each SemanticName m_pSignatureParameters = (D3D11_SIGNATURE_PARAMETER*)malloc((TotalParameterSize + TotalCharSumsSize + TotalStringLength)*sizeof(BYTE)); if( !m_pSignatureParameters ) { return E_OUTOFMEMORY; } m_pSemanticNameCharSums = (UINT*)((BYTE*)m_pSignatureParameters + TotalParameterSize); char* pNextDstString = (char*)((BYTE*)m_pSemanticNameCharSums + TotalCharSumsSize); for( i = 0; i < cParameters; i++ ) { char* pNextSrcString = (char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName); m_pSignatureParameters[i].Stream = pParameterInfo[i].Stream; m_pSignatureParameters[i].ComponentType = ConvertToSB(pParameterInfo[i].ComponentType); m_pSignatureParameters[i].Mask = pParameterInfo[i].Mask; m_pSignatureParameters[i].Register = pParameterInfo[i].Register; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].SystemValue = ConvertToSB(pParameterInfo[i].SystemValue,pParameterInfo[i].SemanticIndex); m_pSignatureParameters[i].SemanticName = bForceStringReference ? pNextSrcString : pNextDstString; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].NeverWrites_Mask = pParameterInfo[i].NeverWrites_Mask; // union with AlwaysReadMask m_pSignatureParameters[i].MinPrecision = pParameterInfo[i].MinPrecision; // This strlen was checked with BoundedStringLength in the first loop. #pragma prefast( suppress : __WARNING_PRECONDITION_NULLTERMINATION_VIOLATION ) UINT length = (UINT)strlen(pNextSrcString) + 1; if (!bForceStringReference) { __analysis_assume((char*)pNextDstString + length < (char*)m_pSignatureParameters + (TotalParameterSize + TotalCharSumsSize + TotalStringLength) * sizeof(BYTE)); // Calculation of TotalStringLength ensures that we have space in pNextDstString #pragma prefast( suppress : __WARNING_POTENTIAL_BUFFER_OVERFLOW_LOOP_DEPENDENT ) memcpy(pNextDstString, pNextSrcString, length); pNextDstString += length; } m_pSemanticNameCharSums[i] = 0; for( UINT j = 0; j < length; j++ ) { m_pSemanticNameCharSums[i] += tolower(pNextSrcString[j]); } } m_cParameters = cParameters; UINT PreviousParams = 0; for( i = 0; i < m_NumSigs; i++ ) { m_Sig[i].ReadSignature11_1( &m_pSignatureParameters[PreviousParams], &m_pSemanticNameCharSums[PreviousParams], StreamParameters[i] ); PreviousParams += StreamParameters[i]; } return S_OK; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::ReadSignature5() HRESULT CSignatureParser5::ReadSignature5( __in_bcount(BlobSize) const void* pSignature, UINT BlobSize, bool bForceStringReference ) { Cleanup(); if( !pSignature || BlobSize < sizeof(D3D10_INTERNALSHADER_SIGNATURE) ) { return E_FAIL; } D3D10_INTERNALSHADER_SIGNATURE* pHeader = (D3D10_INTERNALSHADER_SIGNATURE*)pSignature; if( pHeader->Parameters == 0 ) { m_cParameters = 0; return S_OK; } // If parameter count is less than MaxParameters calculation of SignatureAndParameterInfoSize will not overflow. const UINT MaxParameters = ((UINT_MAX - sizeof(D3D10_INTERNALSHADER_SIGNATURE)) / sizeof(D3D11_INTERNALSHADER_PARAMETER_FOR_GS)); if (pHeader->Parameters > MaxParameters) { return E_FAIL; } UINT ParameterInfoSize = pHeader->Parameters * sizeof(D3D11_INTERNALSHADER_PARAMETER_FOR_GS); UINT SignatureAndParameterInfoSize = ParameterInfoSize + sizeof(D3D10_INTERNALSHADER_SIGNATURE); if (BlobSize < SignatureAndParameterInfoSize) { return E_FAIL; } // Keep end pointer around for checking const void *pEnd = static_cast(pSignature) + BlobSize; const D3D11_INTERNALSHADER_PARAMETER_FOR_GS* pParameterInfo = (D3D11_INTERNALSHADER_PARAMETER_FOR_GS*)((const BYTE*)pSignature + pHeader->ParameterInfo); UINT cParameters = pHeader->Parameters; UINT TotalStringLength = 0; UINT StreamParameters[D3D11_SO_STREAM_COUNT] = {0}; UINT i = 0; for( UINT s = 0; s < D3D11_SO_STREAM_COUNT; s++ ) { UINT LastRegister = 0; UINT FirstParameter = i; for( ; i < cParameters; i++ ) { if( pParameterInfo[i].Stream != s ) { break; } StreamParameters[s]++; UINT StringLength; if (FAILED(BoundedStringLength((const char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName), (const char*) pEnd, &StringLength))) { return E_FAIL; } if (!bForceStringReference) { TotalStringLength += StringLength + 1; } if( i > FirstParameter ) { // registers must show up in nondecreasing order in a signature if( LastRegister > pParameterInfo[i].Register ) { return E_FAIL; } } LastRegister = pParameterInfo[i].Register; m_NumSigs = s + 1; } } UINT TotalParameterSize = pHeader->Parameters*sizeof(D3D11_SIGNATURE_PARAMETER); UINT TotalCharSumsSize = sizeof(UINT)*cParameters; // char sums for each SemanticName m_pSignatureParameters = (D3D11_SIGNATURE_PARAMETER*)malloc((TotalParameterSize + TotalCharSumsSize + TotalStringLength)*sizeof(BYTE)); if( !m_pSignatureParameters ) { return E_OUTOFMEMORY; } m_pSemanticNameCharSums = (UINT*)((BYTE*)m_pSignatureParameters + TotalParameterSize); char* pNextDstString = (char*)((BYTE*)m_pSemanticNameCharSums + TotalCharSumsSize); for( i = 0; i < cParameters; i++ ) { char* pNextSrcString = (char*)((const BYTE*)pSignature + pParameterInfo[i].SemanticName); m_pSignatureParameters[i].Stream = pParameterInfo[i].Stream; m_pSignatureParameters[i].ComponentType = ConvertToSB(pParameterInfo[i].ComponentType); m_pSignatureParameters[i].Mask = pParameterInfo[i].Mask; m_pSignatureParameters[i].Register = pParameterInfo[i].Register; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].SystemValue = ConvertToSB(pParameterInfo[i].SystemValue,pParameterInfo[i].SemanticIndex); m_pSignatureParameters[i].SemanticName = pNextDstString; m_pSignatureParameters[i].SemanticIndex = pParameterInfo[i].SemanticIndex; m_pSignatureParameters[i].NeverWrites_Mask = pParameterInfo[i].NeverWrites_Mask; // union with AlwaysReadMask m_pSignatureParameters[i].MinPrecision = D3D_MIN_PRECISION_DEFAULT; // This strlen was checked with BoundedStringLength in the first loop. #pragma prefast( suppress : __WARNING_PRECONDITION_NULLTERMINATION_VIOLATION ) UINT length = (UINT)strlen(pNextSrcString) + 1; if (!bForceStringReference) { __analysis_assume((char*)pNextDstString + length < (char*)m_pSignatureParameters + (TotalParameterSize + TotalCharSumsSize + TotalStringLength) * sizeof(BYTE)); // Calculation of TotalStringLength ensures that we have space in pNextDstString #pragma prefast( suppress : __WARNING_POTENTIAL_BUFFER_OVERFLOW_LOOP_DEPENDENT ) memcpy(pNextDstString, pNextSrcString, length); pNextDstString += length; } m_pSemanticNameCharSums[i] = 0; for( UINT j = 0; j < length; j++ ) { m_pSemanticNameCharSums[i] += tolower(pNextSrcString[j]); } } m_cParameters = cParameters; UINT PreviousParams = 0; for( i = 0; i < m_NumSigs; i++ ) { m_Sig[i].ReadSignature5( &m_pSignatureParameters[PreviousParams], &m_pSemanticNameCharSums[PreviousParams], StreamParameters[i] ); PreviousParams += StreamParameters[i]; } return S_OK; } HRESULT CSignatureParser5::ReadSignature4( const void* pSignature, UINT BlobSize, bool bForceStringReference ) { Cleanup(); if( !pSignature ) { return E_FAIL; } if( FAILED( m_Sig[0].ReadSignature4( pSignature, BlobSize, bForceStringReference ) ) ) { return E_FAIL; } m_pSignatureParameters = m_Sig[0].m_pSignatureParameters; m_Sig[0].m_OwnParameters = FALSE; // steal the signature from our child m_pSemanticNameCharSums = m_Sig[0].m_pSemanticNameCharSums; m_cParameters = m_Sig[0].m_cParameters; m_RasterizedStream = 0; m_NumSigs = 1; return S_OK; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::GetParameters() UINT CSignatureParser::GetParameters( D3D11_SIGNATURE_PARAMETER const** ppParameters ) const { if( ppParameters ) { *ppParameters = m_pSignatureParameters; } return m_cParameters; } //--------------------------------------------------------------------------------------------------------------------------------- // LowerCaseCharSum static UINT LowerCaseCharSum(LPCSTR pStr) { if (!pStr) return 0; UINT sum = 0; while (*pStr != '\0') { sum += tolower(*pStr); pStr++; } return sum; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::FindParameter() HRESULT CSignatureParser::FindParameter( LPCSTR SemanticName, UINT SemanticIndex, D3D11_SIGNATURE_PARAMETER** ppFoundParameter ) const { UINT InputNameCharSum = LowerCaseCharSum(SemanticName); for(UINT i = 0; i < m_cParameters; i++ ) { if( (SemanticIndex == m_pSignatureParameters[i].SemanticIndex) && (InputNameCharSum == m_pSemanticNameCharSums[i]) && (_stricmp(SemanticName,m_pSignatureParameters[i].SemanticName) == 0) ) { if(ppFoundParameter) *ppFoundParameter = &m_pSignatureParameters[i]; return S_OK; } } if(ppFoundParameter) *ppFoundParameter = NULL; return E_FAIL; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::FindParameterRegister() HRESULT CSignatureParser::FindParameterRegister( LPCSTR SemanticName, UINT SemanticIndex, UINT* pFoundParameterRegister ) { UINT InputNameCharSum = LowerCaseCharSum(SemanticName); for(UINT i = 0; i < m_cParameters; i++ ) { if( (SemanticIndex == m_pSignatureParameters[i].SemanticIndex) && (InputNameCharSum == m_pSemanticNameCharSums[i]) && (_stricmp(SemanticName,m_pSignatureParameters[i].SemanticName) == 0) ) { if(pFoundParameterRegister) *pFoundParameterRegister = m_pSignatureParameters[i].Register; return S_OK; } } return E_FAIL; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::GetSemanticNameCharSum() UINT CSignatureParser::GetSemanticNameCharSum( UINT parameter ) { if( parameter >= m_cParameters ) { return 0; } return m_pSemanticNameCharSums[parameter]; } //--------------------------------------------------------------------------------------------------------------------------------- // CSignatureParser::CanOutputTo() bool CSignatureParser::CanOutputTo( CSignatureParser* pTargetSignature ) { if( !pTargetSignature ) { return false; } const D3D11_SIGNATURE_PARAMETER* pDstParam; UINT cTargetParameters = pTargetSignature->GetParameters(&pDstParam); if( cTargetParameters > m_cParameters ) { return false; } assert(cTargetParameters <= 64); // if cTargetParameters is allowed to be much larger, rethink this n^2 algorithm. for( UINT i = 0; i < cTargetParameters; i++ ) { bool bFoundMatch = false; for( UINT j = 0; j < m_cParameters; j++ ) { UINT srcIndex = (i + j) % m_cParameters; // start at the same location in the src as the dest, but loop through all. D3D11_SIGNATURE_PARAMETER* pSrcParam = &(m_pSignatureParameters[srcIndex]); if( ( m_pSemanticNameCharSums[srcIndex] == pTargetSignature->m_pSemanticNameCharSums[i] ) && ( _stricmp(pSrcParam->SemanticName, pDstParam->SemanticName) == 0 ) && ( pSrcParam->SemanticIndex == pDstParam->SemanticIndex ) && ( pSrcParam->Register == pDstParam->Register ) && ( pSrcParam->SystemValue == pDstParam->SystemValue ) && ( pSrcParam->ComponentType == pDstParam->ComponentType ) && ( ( pSrcParam->Mask & pDstParam->Mask ) == pDstParam->Mask ) && // Check shader dependent read/write of input/output... // If the output shader never writes a value and the input always reads it, that's a problem: !((pSrcParam->NeverWrites_Mask & pSrcParam->Mask) & (pDstParam->AlwaysReads_Mask & pDstParam->Mask)) ) { bFoundMatch = true; break; } } if( !bFoundMatch ) { return false; } pDstParam++; } return true; } //--------------------------------------------------------------------------------------------------------------------------------- // Clear out AlwaysReads_Mask / NeverWrites_Mask for all elements in the // signature to force the signature not to cause linkage errors when // passing the signature around after extracting it from a shader. void CSignatureParser::ClearAlwaysReadsNeverWritesMask() { for( UINT i = 0; i < m_cParameters; i++ ) { m_pSignatureParameters[i].NeverWrites_Mask = 0; // union with AlwaysReads_Mask } } ================================================ FILE: LICENSE ================================================ Copyright (c) Microsoft Corporation. MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # D3D12 Translation Layer The D3D12 Translation Layer is a helper library for translating graphics concepts and commands from a D3D11-style domain to a D3D12-style domain. A D3D11-style application generally: * Records graphics commands in a single-threaded manner. * Treats the CPU and GPU timeline as a single timeline. While there are some places where the asynchronicity of the GPU is exposed (e.g. queries or do-not-wait semantics), for the most part, a D3D11-style application can remain unaware of the fact that commands are recorded and executed at a later point in time. * Related to this is the fact that CPU-visible memory contents must be preserved from the time the CPU wrote them, until after the GPU has finished reading them in order to maintain the illusion of a single timeline. * Creates individual state objects (e.g. blend state, rasterizer state) and compiles individual shaders, and only at the time when Draw is invoked does the application provide the full set of state which will be used. * Ignore GPU parallelism and pipelining, trusting that driver introspection will maximize GPU utilization by parallelizing when possible, while preserving D3D11 semantics by synchronizing when necessary. In contrast, a D3D12-style application must: * Be aware of GPU asynchronicity, and manually synchronize the CPU and GPU. * Be aware of GPU parallelism, and manually synchronize/barrier/transition resources from one usage to another. * Manage memory, including allocation, deallocation, and "renaming" (discussed further below). * Provide large bundles of state (called pipeline state objects in D3D12) all at once to enable cross-pipeline compilation and optimization. To that end, this library provides an implementation of an API that looks like [D3D11](https://docs.microsoft.com/en-us/windows/win32/api/_direct3d11/), and submits work to [D3D12](https://docs.microsoft.com/en-us/windows/win32/api/_direct3d12/). Make sure that you visit the [DirectX Landing Page](https://devblogs.microsoft.com/directx/landing-page/) for more resources for DirectX developers. ## Project Background This project was started during the development of Windows 10 and D3D12. The Windows graphics team has a large set of D3D11 content which was heavily utilized during design and bringup of the D3D12 runtime and driver models. In order to use that content, a mapping layer, named D3D11On12, was developed. This mapping layer proved successful and useful, to the point that a second mapping layer was developed, named D3D9On12. As the name implies, this maps from D3D9 to D3D12, and has to solve a lot of the same problems as D3D11On12. So, D3D11On12 was refactored into two pieces: a part that implements the D3D11-specific concepts, and a more general part that translates more traditional graphics constructs into a modern low-level D3D12 API consumer. This more general part is what became the D3D12TranslationLayer. This code is currently being used by two mapping layers that ship as part of Windows: D3D11On12 and D3D9On12. In addition to the core D3D12TranslationLayer code, we also have released the source to D3D11On12, to serve as an example of how to consume this library. ## What does this do? This translation layer provides the following high-level constructs (and more) for applications to use: * Resource binding The D3D12 resource binding model is quite different from D3D11 and prior. Rather than having a flat array of resources set on the pipeline which map 1:1 with shader registers, D3D12 takes a more flexible approach which is also closer to modern hardware. The translation layer takes care of figuring out which registers a shader needs, managing root signatures, populating descriptor heaps/tables, and setting up null descriptors for unbound resources. * Resource renaming D3D11 and older have a concept of `DISCARD` CPU access patterns, where the CPU populates a resource, instructs the GPU to read from it, and then immediately populates new contents without waiting for the GPU to read the old ones. This pattern is typically implemented via a pattern called "renaming", where new memory is allocated during the `DISCARD` operation, and all future references to that resource in the API will point to the new memory rather than the old. The translation layer provides a separation of a resource from its "identity," which enables cheap swapping of the underlying memory of a resource for that of another one without having to recreate views or rebind them. It also provides easy access to rename operations (allocate new memory with the same properties as the current, and swap their identities). * Resource suballocation, pooling, and deferred destruction D3D11-style apps can destroy objects immediately after instructing the GPU to do something with them. D3D12 requires applications to hold on to memory and GPU objects until the GPU has finished accessing them. Additionally, D3D11 apps suffer no penalty from allocating small resources (e.g. 16-byte buffers), where D3D12 apps must recognize that such small allocations are infeasible and should be suballocated from larger resources. Furthermore, constantly creating and destroying resources is a common pattern in D3D11, but in D3D12 this can quickly become expensive. The translation layer handles all of these abstractions seamlessly. * Batching and threading Since D3D11 patterns generally require applications to record all graphics commands on a single thread, there are often other CPU cores that are idle. To improve utilization, the translation layer provides a batching layer which can sit on top of the immediate context, moving the majority of work to a second thread so it can be parallelized. It also provides threadpool-based helpers for offloading PSO compilation to worker threads. Combining these means that compilations can be kicked off at draw-time on the application thread, and only the batching thread needs to wait for them to be completed. Meanwhile, other PSO compilations are starting or completing, minimizing the wall clock time spent compiling shaders. * Residency management This layer incorporates the open-source residency management library to improve utilization on low-memory systems. ## Building This project produces a lib named D3D12TranslationLayer.lib. Additionally, if the [WDK](https://docs.microsoft.com/en-us/windows-hardware/drivers/download-the-wdk) is installed in addition to the Windows SDK, a second project for a second lib named D3D12TranslationLayer_WDK.lib will be created. The D3D12TranslationLayer project requires C++17, and only supports building with MSVC at the moment. ## Contributing This project welcomes contributions. See [CONTRIBUTING](CONTRIBUTING.md) for more information. Contributions to this project will flow back to the D3D11On12 and D3D9On12 mapping layers included in Windows 10. ## Roadmap There are three items currently on the roadmap: 1. Refactoring for additional componentization - currently the translation layer is largely implemented by a monolithic class called the `ImmediateContext`. It would be difficult for an application consumer to pick and choose bits and pieces of functionality provided by this class, but that would be desirable to ease application porting to D3D12 while enabling the application to take on only those responsibilities with which they can achieve improved performance through app-specific information. A high-level thinking here is to require consumers to have an `ImmediateContext` object, and have sub-components that are registered with that context. For example, the resource state tracker component need not always be present, and applications could provide explicit resource barriers rather than relying on the `ImmediateContext` to do it for them. A key constraint on this componentization is that it should not negatively impact performance. 2. Supporting initial data upload on `D3D12_COMMAND_LIST_TYPE_COPY` for discrete GPUs, and using `WriteToSubresource` for UMA (integrated) GPUs. This should improve performance. 3. Supporting multi-GPU scenarios, specifically, using multiple nodes on a single D3D12 device. Currently, the D3D12TranslationLayer only supports one node, though it can be a node other than node 0. Other suggestions or contributions are welcome. ## Data Collection The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described in the repository. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft's privacy statement. Our privacy statement is located at https://go.microsoft.com/fwlink/?LinkID=824704. You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices. Specifically: The `g_hTracelogging` variable has events emitted against it with keywords which may trigger telemetry to be sent, depending on the configuration and registration of the tracelogging provider which is used. If no tracelogging provider is specified (using `D3D12TranslationLayer::SetTraceloggingProvider`) or if the specified provider is not configured for telemetry, then no telemetry will be sent. In the default configuration, no provider is created and no data is sent. ================================================ FILE: SECURITY.md ================================================ ## Security Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. ## Reporting Security Issues **Please do not report security vulnerabilities through public GitHub issues.** Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) * Full paths of source file(s) related to the manifestation of the issue * The location of the affected source code (tag/branch/commit or direct URL) * Any special configuration required to reproduce the issue * Step-by-step instructions to reproduce the issue * Proof-of-concept or exploit code (if possible) * Impact of the issue, including how an attacker might exploit the issue This information will help us triage your report more quickly. If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. ## Preferred Languages We prefer all communications to be in English. ## Policy Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). ================================================ FILE: external/MicrosoftTelemetry.h ================================================ /* ++ Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License. See LICENSE in the project root for license information. Module Name: TraceLoggingConfig.h Abstract: Macro definitions used by this project's TraceLogging ETW providers: - Configuration macros that select the ETW Provider Groups to be used by this project. - Constants for tags that are commonly used in Microsoft's TraceLogging-based ETW. Different versions of this file use different definitions for the TraceLoggingOption configuration macros. The definitions in this file are empty. As a result, providers using this configuration file will not join any ETW Provider Groups and will not be given any special treatment by group-sensitive ETW listeners. Environment: User mode or kernel mode. --*/ #pragma once // Configuration macro for use in TRACELOGGING_DEFINE_PROVIDER. The definition // in this file configures the provider as a normal (non-telemetry) provider. #define TraceLoggingOptionMicrosoftTelemetry() \ // Empty definition for TraceLoggingOptionMicrosoftTelemetry // Configuration macro for use in TRACELOGGING_DEFINE_PROVIDER. The definition // in this file configures the provider as a normal (non-telemetry) provider. #define TraceLoggingOptionWindowsCoreTelemetry() \ // Empty definition for TraceLoggingOptionWindowsCoreTelemetry // Event privacy tags. Use the PDT macro values for the tag parameter, e.g.: // TraceLoggingWrite(..., // TelemetryPrivacyDataTag(PDT_BrowsingHistory | PDT_ProductAndServiceUsage), // ...); #define TelemetryPrivacyDataTag(tag) TraceLoggingUInt64((tag), "PartA_PrivTags") #define PDT_BrowsingHistory 0x0000000000000002u #define PDT_DeviceConnectivityAndConfiguration 0x0000000000000800u #define PDT_InkingTypingAndSpeechUtterance 0x0000000000020000u #define PDT_ProductAndServicePerformance 0x0000000001000000u #define PDT_ProductAndServiceUsage 0x0000000002000000u #define PDT_SoftwareSetupAndInventory 0x0000000080000000u // Event categories specified via keywords, e.g.: // TraceLoggingWrite(..., // TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), // ...); #define MICROSOFT_KEYWORD_CRITICAL_DATA 0x0000800000000000 // Bit 47 #define MICROSOFT_KEYWORD_MEASURES 0x0000400000000000 // Bit 46 #define MICROSOFT_KEYWORD_TELEMETRY 0x0000200000000000 // Bit 45 #define MICROSOFT_KEYWORD_RESERVED_44 0x0000100000000000 // Bit 44 (reserved for future assignment) // Event categories specified via event tags, e.g.: // TraceLoggingWrite(..., // TraceLoggingEventTag(MICROSOFT_EVENTTAG_REALTIME_LATENCY), // ...); #define MICROSOFT_EVENTTAG_DROP_USER_IDS 0x00008000 #define MICROSOFT_EVENTTAG_AGGREGATE 0x00010000 #define MICROSOFT_EVENTTAG_DROP_PII_EXCEPT_IP 0x00020000 #define MICROSOFT_EVENTTAG_COSTDEFERRED_LATENCY 0x00040000 #define MICROSOFT_EVENTTAG_CORE_DATA 0x00080000 #define MICROSOFT_EVENTTAG_INJECT_XTOKEN 0x00100000 #define MICROSOFT_EVENTTAG_REALTIME_LATENCY 0x00200000 #define MICROSOFT_EVENTTAG_NORMAL_LATENCY 0x00400000 #define MICROSOFT_EVENTTAG_CRITICAL_PERSISTENCE 0x00800000 #define MICROSOFT_EVENTTAG_NORMAL_PERSISTENCE 0x01000000 #define MICROSOFT_EVENTTAG_DROP_PII 0x02000000 #define MICROSOFT_EVENTTAG_HASH_PII 0x04000000 #define MICROSOFT_EVENTTAG_MARK_PII 0x08000000 // Field categories specified via field tags, e.g.: // TraceLoggingWrite(..., // TraceLoggingString(szUser, "UserName", "User's name", MICROSOFT_FIELDTAG_HASH_PII), // ...); #define MICROSOFT_FIELDTAG_DROP_PII 0x04000000 #define MICROSOFT_FIELDTAG_HASH_PII 0x08000000 ================================================ FILE: external/d3d12compatibility.h ================================================ /*------------------------------------------------------------------------------------- * * Copyright (c) Microsoft Corporation * *-------------------------------------------------------------------------------------*/ /* this ALWAYS GENERATED file contains the definitions for the interfaces */ /* File created by MIDL compiler version 8.01.0622 */ /* verify that the version is high enough to compile this file*/ #ifndef __REQUIRED_RPCNDR_H_VERSION__ #define __REQUIRED_RPCNDR_H_VERSION__ 500 #endif /* verify that the version is high enough to compile this file*/ #ifndef __REQUIRED_RPCSAL_H_VERSION__ #define __REQUIRED_RPCSAL_H_VERSION__ 100 #endif #include "rpc.h" #include "rpcndr.h" #ifndef __RPCNDR_H_VERSION__ #error this stub requires an updated version of #endif /* __RPCNDR_H_VERSION__ */ #ifndef COM_NO_WINDOWS_H #include "windows.h" #include "ole2.h" #endif /*COM_NO_WINDOWS_H*/ #ifndef __d3d12compatibility_h__ #define __d3d12compatibility_h__ #if defined(_MSC_VER) && (_MSC_VER >= 1020) #pragma once #endif /* Forward Declarations */ #ifndef __ID3D12CompatibilityDevice_FWD_DEFINED__ #define __ID3D12CompatibilityDevice_FWD_DEFINED__ typedef interface ID3D12CompatibilityDevice ID3D12CompatibilityDevice; #endif /* __ID3D12CompatibilityDevice_FWD_DEFINED__ */ #ifndef __ID3D12CompatibilityQueue_FWD_DEFINED__ #define __ID3D12CompatibilityQueue_FWD_DEFINED__ typedef interface ID3D12CompatibilityQueue ID3D12CompatibilityQueue; #endif /* __ID3D12CompatibilityQueue_FWD_DEFINED__ */ /* header files for imported files */ #include "oaidl.h" #include "ocidl.h" #include "d3d11on12.h" #ifdef __cplusplus extern "C"{ #endif /* interface __MIDL_itf_d3d12compatibility_0000_0000 */ /* [local] */ #include #pragma region Desktop Family #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) typedef enum D3D12_COMPATIBILITY_SHARED_FLAGS { D3D12_COMPATIBILITY_SHARED_FLAG_NONE = 0, D3D12_COMPATIBILITY_SHARED_FLAG_NON_NT_HANDLE = 0x1, D3D12_COMPATIBILITY_SHARED_FLAG_KEYED_MUTEX = 0x2, D3D12_COMPATIBILITY_SHARED_FLAG_9_ON_12 = 0x4 } D3D12_COMPATIBILITY_SHARED_FLAGS; DEFINE_ENUM_FLAG_OPERATORS( D3D12_COMPATIBILITY_SHARED_FLAGS ); typedef enum D3D12_REFLECT_SHARED_PROPERTY { D3D12_REFLECT_SHARED_PROPERTY_D3D11_RESOURCE_FLAGS = 0, D3D12_REFELCT_SHARED_PROPERTY_COMPATIBILITY_SHARED_FLAGS = ( D3D12_REFLECT_SHARED_PROPERTY_D3D11_RESOURCE_FLAGS + 1 ) , D3D12_REFLECT_SHARED_PROPERTY_NON_NT_SHARED_HANDLE = ( D3D12_REFELCT_SHARED_PROPERTY_COMPATIBILITY_SHARED_FLAGS + 1 ) } D3D12_REFLECT_SHARED_PROPERTY; extern RPC_IF_HANDLE __MIDL_itf_d3d12compatibility_0000_0000_v0_0_c_ifspec; extern RPC_IF_HANDLE __MIDL_itf_d3d12compatibility_0000_0000_v0_0_s_ifspec; #ifndef __ID3D12CompatibilityDevice_INTERFACE_DEFINED__ #define __ID3D12CompatibilityDevice_INTERFACE_DEFINED__ /* interface ID3D12CompatibilityDevice */ /* [unique][local][object][uuid] */ EXTERN_C const IID IID_ID3D12CompatibilityDevice; #if defined(__cplusplus) && !defined(CINTERFACE) MIDL_INTERFACE("8f1c0e3c-fae3-4a82-b098-bfe1708207ff") ID3D12CompatibilityDevice : public IUnknown { public: virtual HRESULT STDMETHODCALLTYPE CreateSharedResource( _In_ const D3D12_HEAP_PROPERTIES *pHeapProperties, D3D12_HEAP_FLAGS HeapFlags, _In_ const D3D12_RESOURCE_DESC *pDesc, D3D12_RESOURCE_STATES InitialResourceState, _In_opt_ const D3D12_CLEAR_VALUE *pOptimizedClearValue, _In_opt_ const D3D11_RESOURCE_FLAGS *pFlags11, D3D12_COMPATIBILITY_SHARED_FLAGS CompatibilityFlags, _In_opt_ ID3D12LifetimeTracker *pLifetimeTracker, _In_opt_ ID3D12SwapChainAssistant *pOwningSwapchain, REFIID riid, _COM_Outptr_opt_ void **ppResource) = 0; virtual HRESULT STDMETHODCALLTYPE CreateSharedHeap( _In_ const D3D12_HEAP_DESC *pHeapDesc, D3D12_COMPATIBILITY_SHARED_FLAGS CompatibilityFlags, REFIID riid, _COM_Outptr_opt_ void **ppHeap) = 0; virtual HRESULT STDMETHODCALLTYPE ReflectSharedProperties( _In_ ID3D12Object *pHeapOrResource, D3D12_REFLECT_SHARED_PROPERTY ReflectType, _Out_writes_bytes_(DataSize) void *pData, UINT DataSize) = 0; }; #else /* C style interface */ typedef struct ID3D12CompatibilityDeviceVtbl { BEGIN_INTERFACE HRESULT ( STDMETHODCALLTYPE *QueryInterface )( ID3D12CompatibilityDevice * This, REFIID riid, _COM_Outptr_ void **ppvObject); ULONG ( STDMETHODCALLTYPE *AddRef )( ID3D12CompatibilityDevice * This); ULONG ( STDMETHODCALLTYPE *Release )( ID3D12CompatibilityDevice * This); HRESULT ( STDMETHODCALLTYPE *CreateSharedResource )( ID3D12CompatibilityDevice * This, _In_ const D3D12_HEAP_PROPERTIES *pHeapProperties, D3D12_HEAP_FLAGS HeapFlags, _In_ const D3D12_RESOURCE_DESC *pDesc, D3D12_RESOURCE_STATES InitialResourceState, _In_opt_ const D3D12_CLEAR_VALUE *pOptimizedClearValue, _In_opt_ const D3D11_RESOURCE_FLAGS *pFlags11, D3D12_COMPATIBILITY_SHARED_FLAGS CompatibilityFlags, _In_opt_ ID3D12LifetimeTracker *pLifetimeTracker, _In_opt_ ID3D12SwapChainAssistant *pOwningSwapchain, REFIID riid, _COM_Outptr_opt_ void **ppResource); HRESULT ( STDMETHODCALLTYPE *CreateSharedHeap )( ID3D12CompatibilityDevice * This, _In_ const D3D12_HEAP_DESC *pHeapDesc, D3D12_COMPATIBILITY_SHARED_FLAGS CompatibilityFlags, REFIID riid, _COM_Outptr_opt_ void **ppHeap); HRESULT ( STDMETHODCALLTYPE *ReflectSharedProperties )( ID3D12CompatibilityDevice * This, _In_ ID3D12Object *pHeapOrResource, D3D12_REFLECT_SHARED_PROPERTY ReflectType, _Out_writes_bytes_(DataSize) void *pData, UINT DataSize); END_INTERFACE } ID3D12CompatibilityDeviceVtbl; interface ID3D12CompatibilityDevice { CONST_VTBL struct ID3D12CompatibilityDeviceVtbl *lpVtbl; }; #ifdef COBJMACROS #define ID3D12CompatibilityDevice_QueryInterface(This,riid,ppvObject) \ ( (This)->lpVtbl -> QueryInterface(This,riid,ppvObject) ) #define ID3D12CompatibilityDevice_AddRef(This) \ ( (This)->lpVtbl -> AddRef(This) ) #define ID3D12CompatibilityDevice_Release(This) \ ( (This)->lpVtbl -> Release(This) ) #define ID3D12CompatibilityDevice_CreateSharedResource(This,pHeapProperties,HeapFlags,pDesc,InitialResourceState,pOptimizedClearValue,pFlags11,CompatibilityFlags,pLifetimeTracker,pOwningSwapchain,riid,ppResource) \ ( (This)->lpVtbl -> CreateSharedResource(This,pHeapProperties,HeapFlags,pDesc,InitialResourceState,pOptimizedClearValue,pFlags11,CompatibilityFlags,pLifetimeTracker,pOwningSwapchain,riid,ppResource) ) #define ID3D12CompatibilityDevice_CreateSharedHeap(This,pHeapDesc,CompatibilityFlags,riid,ppHeap) \ ( (This)->lpVtbl -> CreateSharedHeap(This,pHeapDesc,CompatibilityFlags,riid,ppHeap) ) #define ID3D12CompatibilityDevice_ReflectSharedProperties(This,pHeapOrResource,ReflectType,pData,DataSize) \ ( (This)->lpVtbl -> ReflectSharedProperties(This,pHeapOrResource,ReflectType,pData,DataSize) ) #endif /* COBJMACROS */ #endif /* C style interface */ #endif /* __ID3D12CompatibilityDevice_INTERFACE_DEFINED__ */ #ifndef __ID3D12CompatibilityQueue_INTERFACE_DEFINED__ #define __ID3D12CompatibilityQueue_INTERFACE_DEFINED__ /* interface ID3D12CompatibilityQueue */ /* [unique][local][object][uuid] */ EXTERN_C const IID IID_ID3D12CompatibilityQueue; #if defined(__cplusplus) && !defined(CINTERFACE) MIDL_INTERFACE("7974c836-9520-4cda-8d43-d996622e8926") ID3D12CompatibilityQueue : public IUnknown { public: virtual HRESULT STDMETHODCALLTYPE AcquireKeyedMutex( _In_ ID3D12Object *pHeapOrResourceWithKeyedMutex, UINT64 Key, DWORD dwTimeout, _Reserved_ void *pReserved, _In_range_(0,0) UINT Reserved) = 0; virtual HRESULT STDMETHODCALLTYPE ReleaseKeyedMutex( _In_ ID3D12Object *pHeapOrResourceWithKeyedMutex, UINT64 Key, _Reserved_ void *pReserved, _In_range_(0,0) UINT Reserved) = 0; }; #else /* C style interface */ typedef struct ID3D12CompatibilityQueueVtbl { BEGIN_INTERFACE HRESULT ( STDMETHODCALLTYPE *QueryInterface )( ID3D12CompatibilityQueue * This, REFIID riid, _COM_Outptr_ void **ppvObject); ULONG ( STDMETHODCALLTYPE *AddRef )( ID3D12CompatibilityQueue * This); ULONG ( STDMETHODCALLTYPE *Release )( ID3D12CompatibilityQueue * This); HRESULT ( STDMETHODCALLTYPE *AcquireKeyedMutex )( ID3D12CompatibilityQueue * This, _In_ ID3D12Object *pHeapOrResourceWithKeyedMutex, UINT64 Key, DWORD dwTimeout, _Reserved_ void *pReserved, _In_range_(0,0) UINT Reserved); HRESULT ( STDMETHODCALLTYPE *ReleaseKeyedMutex )( ID3D12CompatibilityQueue * This, _In_ ID3D12Object *pHeapOrResourceWithKeyedMutex, UINT64 Key, _Reserved_ void *pReserved, _In_range_(0,0) UINT Reserved); END_INTERFACE } ID3D12CompatibilityQueueVtbl; interface ID3D12CompatibilityQueue { CONST_VTBL struct ID3D12CompatibilityQueueVtbl *lpVtbl; }; #ifdef COBJMACROS #define ID3D12CompatibilityQueue_QueryInterface(This,riid,ppvObject) \ ( (This)->lpVtbl -> QueryInterface(This,riid,ppvObject) ) #define ID3D12CompatibilityQueue_AddRef(This) \ ( (This)->lpVtbl -> AddRef(This) ) #define ID3D12CompatibilityQueue_Release(This) \ ( (This)->lpVtbl -> Release(This) ) #define ID3D12CompatibilityQueue_AcquireKeyedMutex(This,pHeapOrResourceWithKeyedMutex,Key,dwTimeout,pReserved,Reserved) \ ( (This)->lpVtbl -> AcquireKeyedMutex(This,pHeapOrResourceWithKeyedMutex,Key,dwTimeout,pReserved,Reserved) ) #define ID3D12CompatibilityQueue_ReleaseKeyedMutex(This,pHeapOrResourceWithKeyedMutex,Key,pReserved,Reserved) \ ( (This)->lpVtbl -> ReleaseKeyedMutex(This,pHeapOrResourceWithKeyedMutex,Key,pReserved,Reserved) ) #endif /* COBJMACROS */ #endif /* C style interface */ #endif /* __ID3D12CompatibilityQueue_INTERFACE_DEFINED__ */ /* interface __MIDL_itf_d3d12compatibility_0000_0002 */ /* [local] */ #endif /* WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) */ #pragma endregion DEFINE_GUID(IID_ID3D12CompatibilityDevice,0x8f1c0e3c,0xfae3,0x4a82,0xb0,0x98,0xbf,0xe1,0x70,0x82,0x07,0xff); DEFINE_GUID(IID_ID3D12CompatibilityQueue,0x7974c836,0x9520,0x4cda,0x8d,0x43,0xd9,0x96,0x62,0x2e,0x89,0x26); extern RPC_IF_HANDLE __MIDL_itf_d3d12compatibility_0000_0002_v0_0_c_ifspec; extern RPC_IF_HANDLE __MIDL_itf_d3d12compatibility_0000_0002_v0_0_s_ifspec; /* Additional Prototypes for ALL interfaces */ /* end of Additional Prototypes */ #ifdef __cplusplus } #endif #endif ================================================ FILE: external/d3dx12.h ================================================ //********************************************************* // // Copyright (c) Microsoft. All rights reserved. // This code is licensed under the MIT License (MIT). // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. // //********************************************************* #ifndef __D3DX12_H__ #define __D3DX12_H__ #include "d3d12.h" #if defined( __cplusplus ) struct CD3DX12_DEFAULT {}; extern const DECLSPEC_SELECTANY CD3DX12_DEFAULT D3D12_DEFAULT; //------------------------------------------------------------------------------------------------ inline bool operator==( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) { return l.TopLeftX == r.TopLeftX && l.TopLeftY == r.TopLeftY && l.Width == r.Width && l.Height == r.Height && l.MinDepth == r.MinDepth && l.MaxDepth == r.MaxDepth; } //------------------------------------------------------------------------------------------------ inline bool operator!=( const D3D12_VIEWPORT& l, const D3D12_VIEWPORT& r ) { return !( l == r ); } //------------------------------------------------------------------------------------------------ struct CD3DX12_RECT : public D3D12_RECT { CD3DX12_RECT() = default; explicit CD3DX12_RECT( const D3D12_RECT& o ) : D3D12_RECT( o ) {} explicit CD3DX12_RECT( LONG Left, LONG Top, LONG Right, LONG Bottom ) { left = Left; top = Top; right = Right; bottom = Bottom; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_VIEWPORT : public D3D12_VIEWPORT { CD3DX12_VIEWPORT() = default; explicit CD3DX12_VIEWPORT( const D3D12_VIEWPORT& o ) : D3D12_VIEWPORT( o ) {} explicit CD3DX12_VIEWPORT( FLOAT topLeftX, FLOAT topLeftY, FLOAT width, FLOAT height, FLOAT minDepth = D3D12_MIN_DEPTH, FLOAT maxDepth = D3D12_MAX_DEPTH ) { TopLeftX = topLeftX; TopLeftY = topLeftY; Width = width; Height = height; MinDepth = minDepth; MaxDepth = maxDepth; } explicit CD3DX12_VIEWPORT( _In_ ID3D12Resource* pResource, UINT mipSlice = 0, FLOAT topLeftX = 0.0f, FLOAT topLeftY = 0.0f, FLOAT minDepth = D3D12_MIN_DEPTH, FLOAT maxDepth = D3D12_MAX_DEPTH ) { auto Desc = pResource->GetDesc(); const UINT64 SubresourceWidth = Desc.Width >> mipSlice; const UINT64 SubresourceHeight = Desc.Height >> mipSlice; switch (Desc.Dimension) { case D3D12_RESOURCE_DIMENSION_BUFFER: TopLeftX = topLeftX; TopLeftY = 0.0f; Width = Desc.Width - topLeftX; Height = 1.0f; break; case D3D12_RESOURCE_DIMENSION_TEXTURE1D: TopLeftX = topLeftX; TopLeftY = 0.0f; Width = (SubresourceWidth ? SubresourceWidth : 1.0f) - topLeftX; Height = 1.0f; break; case D3D12_RESOURCE_DIMENSION_TEXTURE2D: case D3D12_RESOURCE_DIMENSION_TEXTURE3D: TopLeftX = topLeftX; TopLeftY = topLeftY; Width = (SubresourceWidth ? SubresourceWidth : 1.0f) - topLeftX; Height = (SubresourceHeight ? SubresourceHeight: 1.0f) - topLeftY; break; default: break; } MinDepth = minDepth; MaxDepth = maxDepth; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_BOX : public D3D12_BOX { CD3DX12_BOX() = default; explicit CD3DX12_BOX( const D3D12_BOX& o ) : D3D12_BOX( o ) {} explicit CD3DX12_BOX( LONG Left, LONG Right ) { left = static_cast(Left); top = 0; front = 0; right = static_cast(Right); bottom = 1; back = 1; } explicit CD3DX12_BOX( LONG Left, LONG Top, LONG Right, LONG Bottom ) { left = static_cast(Left); top = static_cast(Top); front = 0; right = static_cast(Right); bottom = static_cast(Bottom); back = 1; } explicit CD3DX12_BOX( LONG Left, LONG Top, LONG Front, LONG Right, LONG Bottom, LONG Back ) { left = static_cast(Left); top = static_cast(Top); front = static_cast(Front); right = static_cast(Right); bottom = static_cast(Bottom); back = static_cast(Back); } }; inline bool operator==( const D3D12_BOX& l, const D3D12_BOX& r ) { return l.left == r.left && l.top == r.top && l.front == r.front && l.right == r.right && l.bottom == r.bottom && l.back == r.back; } inline bool operator!=( const D3D12_BOX& l, const D3D12_BOX& r ) { return !( l == r ); } //------------------------------------------------------------------------------------------------ struct CD3DX12_DEPTH_STENCIL_DESC : public D3D12_DEPTH_STENCIL_DESC { CD3DX12_DEPTH_STENCIL_DESC() = default; explicit CD3DX12_DEPTH_STENCIL_DESC( const D3D12_DEPTH_STENCIL_DESC& o ) : D3D12_DEPTH_STENCIL_DESC( o ) {} explicit CD3DX12_DEPTH_STENCIL_DESC( CD3DX12_DEFAULT ) { DepthEnable = TRUE; DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; DepthFunc = D3D12_COMPARISON_FUNC_LESS; StencilEnable = FALSE; StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; FrontFace = defaultStencilOp; BackFace = defaultStencilOp; } explicit CD3DX12_DEPTH_STENCIL_DESC( BOOL depthEnable, D3D12_DEPTH_WRITE_MASK depthWriteMask, D3D12_COMPARISON_FUNC depthFunc, BOOL stencilEnable, UINT8 stencilReadMask, UINT8 stencilWriteMask, D3D12_STENCIL_OP frontStencilFailOp, D3D12_STENCIL_OP frontStencilDepthFailOp, D3D12_STENCIL_OP frontStencilPassOp, D3D12_COMPARISON_FUNC frontStencilFunc, D3D12_STENCIL_OP backStencilFailOp, D3D12_STENCIL_OP backStencilDepthFailOp, D3D12_STENCIL_OP backStencilPassOp, D3D12_COMPARISON_FUNC backStencilFunc ) { DepthEnable = depthEnable; DepthWriteMask = depthWriteMask; DepthFunc = depthFunc; StencilEnable = stencilEnable; StencilReadMask = stencilReadMask; StencilWriteMask = stencilWriteMask; FrontFace.StencilFailOp = frontStencilFailOp; FrontFace.StencilDepthFailOp = frontStencilDepthFailOp; FrontFace.StencilPassOp = frontStencilPassOp; FrontFace.StencilFunc = frontStencilFunc; BackFace.StencilFailOp = backStencilFailOp; BackFace.StencilDepthFailOp = backStencilDepthFailOp; BackFace.StencilPassOp = backStencilPassOp; BackFace.StencilFunc = backStencilFunc; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_DEPTH_STENCIL_DESC1 : public D3D12_DEPTH_STENCIL_DESC1 { CD3DX12_DEPTH_STENCIL_DESC1() = default; explicit CD3DX12_DEPTH_STENCIL_DESC1( const D3D12_DEPTH_STENCIL_DESC1& o ) : D3D12_DEPTH_STENCIL_DESC1( o ) {} explicit CD3DX12_DEPTH_STENCIL_DESC1( const D3D12_DEPTH_STENCIL_DESC& o ) { DepthEnable = o.DepthEnable; DepthWriteMask = o.DepthWriteMask; DepthFunc = o.DepthFunc; StencilEnable = o.StencilEnable; StencilReadMask = o.StencilReadMask; StencilWriteMask = o.StencilWriteMask; FrontFace.StencilFailOp = o.FrontFace.StencilFailOp; FrontFace.StencilDepthFailOp = o.FrontFace.StencilDepthFailOp; FrontFace.StencilPassOp = o.FrontFace.StencilPassOp; FrontFace.StencilFunc = o.FrontFace.StencilFunc; BackFace.StencilFailOp = o.BackFace.StencilFailOp; BackFace.StencilDepthFailOp = o.BackFace.StencilDepthFailOp; BackFace.StencilPassOp = o.BackFace.StencilPassOp; BackFace.StencilFunc = o.BackFace.StencilFunc; DepthBoundsTestEnable = FALSE; } explicit CD3DX12_DEPTH_STENCIL_DESC1( CD3DX12_DEFAULT ) { DepthEnable = TRUE; DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; DepthFunc = D3D12_COMPARISON_FUNC_LESS; StencilEnable = FALSE; StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; const D3D12_DEPTH_STENCILOP_DESC defaultStencilOp = { D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_STENCIL_OP_KEEP, D3D12_COMPARISON_FUNC_ALWAYS }; FrontFace = defaultStencilOp; BackFace = defaultStencilOp; DepthBoundsTestEnable = FALSE; } explicit CD3DX12_DEPTH_STENCIL_DESC1( BOOL depthEnable, D3D12_DEPTH_WRITE_MASK depthWriteMask, D3D12_COMPARISON_FUNC depthFunc, BOOL stencilEnable, UINT8 stencilReadMask, UINT8 stencilWriteMask, D3D12_STENCIL_OP frontStencilFailOp, D3D12_STENCIL_OP frontStencilDepthFailOp, D3D12_STENCIL_OP frontStencilPassOp, D3D12_COMPARISON_FUNC frontStencilFunc, D3D12_STENCIL_OP backStencilFailOp, D3D12_STENCIL_OP backStencilDepthFailOp, D3D12_STENCIL_OP backStencilPassOp, D3D12_COMPARISON_FUNC backStencilFunc, BOOL depthBoundsTestEnable ) { DepthEnable = depthEnable; DepthWriteMask = depthWriteMask; DepthFunc = depthFunc; StencilEnable = stencilEnable; StencilReadMask = stencilReadMask; StencilWriteMask = stencilWriteMask; FrontFace.StencilFailOp = frontStencilFailOp; FrontFace.StencilDepthFailOp = frontStencilDepthFailOp; FrontFace.StencilPassOp = frontStencilPassOp; FrontFace.StencilFunc = frontStencilFunc; BackFace.StencilFailOp = backStencilFailOp; BackFace.StencilDepthFailOp = backStencilDepthFailOp; BackFace.StencilPassOp = backStencilPassOp; BackFace.StencilFunc = backStencilFunc; DepthBoundsTestEnable = depthBoundsTestEnable; } operator D3D12_DEPTH_STENCIL_DESC() const { D3D12_DEPTH_STENCIL_DESC D; D.DepthEnable = DepthEnable; D.DepthWriteMask = DepthWriteMask; D.DepthFunc = DepthFunc; D.StencilEnable = StencilEnable; D.StencilReadMask = StencilReadMask; D.StencilWriteMask = StencilWriteMask; D.FrontFace.StencilFailOp = FrontFace.StencilFailOp; D.FrontFace.StencilDepthFailOp = FrontFace.StencilDepthFailOp; D.FrontFace.StencilPassOp = FrontFace.StencilPassOp; D.FrontFace.StencilFunc = FrontFace.StencilFunc; D.BackFace.StencilFailOp = BackFace.StencilFailOp; D.BackFace.StencilDepthFailOp = BackFace.StencilDepthFailOp; D.BackFace.StencilPassOp = BackFace.StencilPassOp; D.BackFace.StencilFunc = BackFace.StencilFunc; return D; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_BLEND_DESC : public D3D12_BLEND_DESC { CD3DX12_BLEND_DESC() = default; explicit CD3DX12_BLEND_DESC( const D3D12_BLEND_DESC& o ) : D3D12_BLEND_DESC( o ) {} explicit CD3DX12_BLEND_DESC( CD3DX12_DEFAULT ) { AlphaToCoverageEnable = FALSE; IndependentBlendEnable = FALSE; const D3D12_RENDER_TARGET_BLEND_DESC defaultRenderTargetBlendDesc = { FALSE,FALSE, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_BLEND_ONE, D3D12_BLEND_ZERO, D3D12_BLEND_OP_ADD, D3D12_LOGIC_OP_NOOP, D3D12_COLOR_WRITE_ENABLE_ALL, }; for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) RenderTarget[ i ] = defaultRenderTargetBlendDesc; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_RASTERIZER_DESC : public D3D12_RASTERIZER_DESC { CD3DX12_RASTERIZER_DESC() = default; explicit CD3DX12_RASTERIZER_DESC( const D3D12_RASTERIZER_DESC& o ) : D3D12_RASTERIZER_DESC( o ) {} explicit CD3DX12_RASTERIZER_DESC( CD3DX12_DEFAULT ) { FillMode = D3D12_FILL_MODE_SOLID; CullMode = D3D12_CULL_MODE_BACK; FrontCounterClockwise = FALSE; DepthBias = D3D12_DEFAULT_DEPTH_BIAS; DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; DepthClipEnable = TRUE; MultisampleEnable = FALSE; AntialiasedLineEnable = FALSE; ForcedSampleCount = 0; ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; } explicit CD3DX12_RASTERIZER_DESC( D3D12_FILL_MODE fillMode, D3D12_CULL_MODE cullMode, BOOL frontCounterClockwise, INT depthBias, FLOAT depthBiasClamp, FLOAT slopeScaledDepthBias, BOOL depthClipEnable, BOOL multisampleEnable, BOOL antialiasedLineEnable, UINT forcedSampleCount, D3D12_CONSERVATIVE_RASTERIZATION_MODE conservativeRaster) { FillMode = fillMode; CullMode = cullMode; FrontCounterClockwise = frontCounterClockwise; DepthBias = depthBias; DepthBiasClamp = depthBiasClamp; SlopeScaledDepthBias = slopeScaledDepthBias; DepthClipEnable = depthClipEnable; MultisampleEnable = multisampleEnable; AntialiasedLineEnable = antialiasedLineEnable; ForcedSampleCount = forcedSampleCount; ConservativeRaster = conservativeRaster; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_RESOURCE_ALLOCATION_INFO : public D3D12_RESOURCE_ALLOCATION_INFO { CD3DX12_RESOURCE_ALLOCATION_INFO() = default; explicit CD3DX12_RESOURCE_ALLOCATION_INFO( const D3D12_RESOURCE_ALLOCATION_INFO& o ) : D3D12_RESOURCE_ALLOCATION_INFO( o ) {} CD3DX12_RESOURCE_ALLOCATION_INFO( UINT64 size, UINT64 alignment ) { SizeInBytes = size; Alignment = alignment; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_HEAP_PROPERTIES : public D3D12_HEAP_PROPERTIES { CD3DX12_HEAP_PROPERTIES() = default; explicit CD3DX12_HEAP_PROPERTIES(const D3D12_HEAP_PROPERTIES &o) : D3D12_HEAP_PROPERTIES(o) {} CD3DX12_HEAP_PROPERTIES( D3D12_CPU_PAGE_PROPERTY cpuPageProperty, D3D12_MEMORY_POOL memoryPoolPreference, UINT creationNodeMask = 1, UINT nodeMask = 1 ) { Type = D3D12_HEAP_TYPE_CUSTOM; CPUPageProperty = cpuPageProperty; MemoryPoolPreference = memoryPoolPreference; CreationNodeMask = creationNodeMask; VisibleNodeMask = nodeMask; } explicit CD3DX12_HEAP_PROPERTIES( D3D12_HEAP_TYPE type, UINT creationNodeMask = 1, UINT nodeMask = 1 ) { Type = type; CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; CreationNodeMask = creationNodeMask; VisibleNodeMask = nodeMask; } bool IsCPUAccessible() const { return Type == D3D12_HEAP_TYPE_UPLOAD || Type == D3D12_HEAP_TYPE_READBACK || (Type == D3D12_HEAP_TYPE_CUSTOM && (CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE || CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK)); } }; inline bool operator==( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) { return l.Type == r.Type && l.CPUPageProperty == r.CPUPageProperty && l.MemoryPoolPreference == r.MemoryPoolPreference && l.CreationNodeMask == r.CreationNodeMask && l.VisibleNodeMask == r.VisibleNodeMask; } inline bool operator!=( const D3D12_HEAP_PROPERTIES& l, const D3D12_HEAP_PROPERTIES& r ) { return !( l == r ); } //------------------------------------------------------------------------------------------------ struct CD3DX12_HEAP_DESC : public D3D12_HEAP_DESC { CD3DX12_HEAP_DESC() = default; explicit CD3DX12_HEAP_DESC(const D3D12_HEAP_DESC &o) : D3D12_HEAP_DESC(o) {} CD3DX12_HEAP_DESC( UINT64 size, D3D12_HEAP_PROPERTIES properties, UINT64 alignment = 0, D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) { SizeInBytes = size; Properties = properties; Alignment = alignment; Flags = flags; } CD3DX12_HEAP_DESC( UINT64 size, D3D12_HEAP_TYPE type, UINT64 alignment = 0, D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) { SizeInBytes = size; Properties = CD3DX12_HEAP_PROPERTIES( type ); Alignment = alignment; Flags = flags; } CD3DX12_HEAP_DESC( UINT64 size, D3D12_CPU_PAGE_PROPERTY cpuPageProperty, D3D12_MEMORY_POOL memoryPoolPreference, UINT64 alignment = 0, D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) { SizeInBytes = size; Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); Alignment = alignment; Flags = flags; } CD3DX12_HEAP_DESC( const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, D3D12_HEAP_PROPERTIES properties, D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) { SizeInBytes = resAllocInfo.SizeInBytes; Properties = properties; Alignment = resAllocInfo.Alignment; Flags = flags; } CD3DX12_HEAP_DESC( const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) { SizeInBytes = resAllocInfo.SizeInBytes; Properties = CD3DX12_HEAP_PROPERTIES( type ); Alignment = resAllocInfo.Alignment; Flags = flags; } CD3DX12_HEAP_DESC( const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, D3D12_CPU_PAGE_PROPERTY cpuPageProperty, D3D12_MEMORY_POOL memoryPoolPreference, D3D12_HEAP_FLAGS flags = D3D12_HEAP_FLAG_NONE ) { SizeInBytes = resAllocInfo.SizeInBytes; Properties = CD3DX12_HEAP_PROPERTIES( cpuPageProperty, memoryPoolPreference ); Alignment = resAllocInfo.Alignment; Flags = flags; } bool IsCPUAccessible() const { return static_cast< const CD3DX12_HEAP_PROPERTIES* >( &Properties )->IsCPUAccessible(); } }; inline bool operator==( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) { return l.SizeInBytes == r.SizeInBytes && l.Properties == r.Properties && l.Alignment == r.Alignment && l.Flags == r.Flags; } inline bool operator!=( const D3D12_HEAP_DESC& l, const D3D12_HEAP_DESC& r ) { return !( l == r ); } //------------------------------------------------------------------------------------------------ struct CD3DX12_CLEAR_VALUE : public D3D12_CLEAR_VALUE { CD3DX12_CLEAR_VALUE() = default; explicit CD3DX12_CLEAR_VALUE(const D3D12_CLEAR_VALUE &o) : D3D12_CLEAR_VALUE(o) {} CD3DX12_CLEAR_VALUE( DXGI_FORMAT format, const FLOAT color[4] ) { Format = format; memcpy( Color, color, sizeof( Color ) ); } CD3DX12_CLEAR_VALUE( DXGI_FORMAT format, FLOAT depth, UINT8 stencil ) { Format = format; /* Use memcpy to preserve NAN values */ memcpy( &DepthStencil.Depth, &depth, sizeof( depth ) ); DepthStencil.Stencil = stencil; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_RANGE : public D3D12_RANGE { CD3DX12_RANGE() = default; explicit CD3DX12_RANGE(const D3D12_RANGE &o) : D3D12_RANGE(o) {} CD3DX12_RANGE( SIZE_T begin, SIZE_T end ) { Begin = begin; End = end; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_RANGE_UINT64 : public D3D12_RANGE_UINT64 { CD3DX12_RANGE_UINT64() = default; explicit CD3DX12_RANGE_UINT64(const D3D12_RANGE_UINT64 &o) : D3D12_RANGE_UINT64(o) {} CD3DX12_RANGE_UINT64( UINT64 begin, UINT64 end ) { Begin = begin; End = end; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_SUBRESOURCE_RANGE_UINT64 : public D3D12_SUBRESOURCE_RANGE_UINT64 { CD3DX12_SUBRESOURCE_RANGE_UINT64() = default; explicit CD3DX12_SUBRESOURCE_RANGE_UINT64(const D3D12_SUBRESOURCE_RANGE_UINT64 &o) : D3D12_SUBRESOURCE_RANGE_UINT64(o) {} CD3DX12_SUBRESOURCE_RANGE_UINT64( UINT subresource, const D3D12_RANGE_UINT64& range ) { Subresource = subresource; Range = range; } CD3DX12_SUBRESOURCE_RANGE_UINT64( UINT subresource, UINT64 begin, UINT64 end ) { Subresource = subresource; Range.Begin = begin; Range.End = end; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_SHADER_BYTECODE : public D3D12_SHADER_BYTECODE { CD3DX12_SHADER_BYTECODE() = default; explicit CD3DX12_SHADER_BYTECODE(const D3D12_SHADER_BYTECODE &o) : D3D12_SHADER_BYTECODE(o) {} CD3DX12_SHADER_BYTECODE( _In_ ID3DBlob* pShaderBlob ) { pShaderBytecode = pShaderBlob->GetBufferPointer(); BytecodeLength = pShaderBlob->GetBufferSize(); } CD3DX12_SHADER_BYTECODE( const void* _pShaderBytecode, SIZE_T bytecodeLength ) { pShaderBytecode = _pShaderBytecode; BytecodeLength = bytecodeLength; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_TILED_RESOURCE_COORDINATE : public D3D12_TILED_RESOURCE_COORDINATE { CD3DX12_TILED_RESOURCE_COORDINATE() = default; explicit CD3DX12_TILED_RESOURCE_COORDINATE(const D3D12_TILED_RESOURCE_COORDINATE &o) : D3D12_TILED_RESOURCE_COORDINATE(o) {} CD3DX12_TILED_RESOURCE_COORDINATE( UINT x, UINT y, UINT z, UINT subresource ) { X = x; Y = y; Z = z; Subresource = subresource; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_TILE_REGION_SIZE : public D3D12_TILE_REGION_SIZE { CD3DX12_TILE_REGION_SIZE() = default; explicit CD3DX12_TILE_REGION_SIZE(const D3D12_TILE_REGION_SIZE &o) : D3D12_TILE_REGION_SIZE(o) {} CD3DX12_TILE_REGION_SIZE( UINT numTiles, BOOL useBox, UINT width, UINT16 height, UINT16 depth ) { NumTiles = numTiles; UseBox = useBox; Width = width; Height = height; Depth = depth; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_SUBRESOURCE_TILING : public D3D12_SUBRESOURCE_TILING { CD3DX12_SUBRESOURCE_TILING() = default; explicit CD3DX12_SUBRESOURCE_TILING(const D3D12_SUBRESOURCE_TILING &o) : D3D12_SUBRESOURCE_TILING(o) {} CD3DX12_SUBRESOURCE_TILING( UINT widthInTiles, UINT16 heightInTiles, UINT16 depthInTiles, UINT startTileIndexInOverallResource ) { WidthInTiles = widthInTiles; HeightInTiles = heightInTiles; DepthInTiles = depthInTiles; StartTileIndexInOverallResource = startTileIndexInOverallResource; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_TILE_SHAPE : public D3D12_TILE_SHAPE { CD3DX12_TILE_SHAPE() = default; explicit CD3DX12_TILE_SHAPE(const D3D12_TILE_SHAPE &o) : D3D12_TILE_SHAPE(o) {} CD3DX12_TILE_SHAPE( UINT widthInTexels, UINT heightInTexels, UINT depthInTexels ) { WidthInTexels = widthInTexels; HeightInTexels = heightInTexels; DepthInTexels = depthInTexels; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_RESOURCE_BARRIER : public D3D12_RESOURCE_BARRIER { CD3DX12_RESOURCE_BARRIER() = default; explicit CD3DX12_RESOURCE_BARRIER(const D3D12_RESOURCE_BARRIER &o) : D3D12_RESOURCE_BARRIER(o) {} static inline CD3DX12_RESOURCE_BARRIER Transition( _In_ ID3D12Resource* pResource, D3D12_RESOURCE_STATES stateBefore, D3D12_RESOURCE_STATES stateAfter, UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_BARRIER_FLAGS flags = D3D12_RESOURCE_BARRIER_FLAG_NONE) { CD3DX12_RESOURCE_BARRIER result = {}; D3D12_RESOURCE_BARRIER &barrier = result; result.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; result.Flags = flags; barrier.Transition.pResource = pResource; barrier.Transition.StateBefore = stateBefore; barrier.Transition.StateAfter = stateAfter; barrier.Transition.Subresource = subresource; return result; } static inline CD3DX12_RESOURCE_BARRIER Aliasing( _In_ ID3D12Resource* pResourceBefore, _In_ ID3D12Resource* pResourceAfter) { CD3DX12_RESOURCE_BARRIER result = {}; D3D12_RESOURCE_BARRIER &barrier = result; result.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; barrier.Aliasing.pResourceBefore = pResourceBefore; barrier.Aliasing.pResourceAfter = pResourceAfter; return result; } static inline CD3DX12_RESOURCE_BARRIER UAV( _In_ ID3D12Resource* pResource) { CD3DX12_RESOURCE_BARRIER result = {}; D3D12_RESOURCE_BARRIER &barrier = result; result.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; barrier.UAV.pResource = pResource; return result; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_PACKED_MIP_INFO : public D3D12_PACKED_MIP_INFO { CD3DX12_PACKED_MIP_INFO() = default; explicit CD3DX12_PACKED_MIP_INFO(const D3D12_PACKED_MIP_INFO &o) : D3D12_PACKED_MIP_INFO(o) {} CD3DX12_PACKED_MIP_INFO( UINT8 numStandardMips, UINT8 numPackedMips, UINT numTilesForPackedMips, UINT startTileIndexInOverallResource ) { NumStandardMips = numStandardMips; NumPackedMips = numPackedMips; NumTilesForPackedMips = numTilesForPackedMips; StartTileIndexInOverallResource = startTileIndexInOverallResource; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_SUBRESOURCE_FOOTPRINT : public D3D12_SUBRESOURCE_FOOTPRINT { CD3DX12_SUBRESOURCE_FOOTPRINT() = default; explicit CD3DX12_SUBRESOURCE_FOOTPRINT(const D3D12_SUBRESOURCE_FOOTPRINT &o) : D3D12_SUBRESOURCE_FOOTPRINT(o) {} CD3DX12_SUBRESOURCE_FOOTPRINT( DXGI_FORMAT format, UINT width, UINT height, UINT depth, UINT rowPitch ) { Format = format; Width = width; Height = height; Depth = depth; RowPitch = rowPitch; } explicit CD3DX12_SUBRESOURCE_FOOTPRINT( const D3D12_RESOURCE_DESC& resDesc, UINT rowPitch ) { Format = resDesc.Format; Width = UINT( resDesc.Width ); Height = resDesc.Height; Depth = (resDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? resDesc.DepthOrArraySize : 1); RowPitch = rowPitch; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_TEXTURE_COPY_LOCATION : public D3D12_TEXTURE_COPY_LOCATION { CD3DX12_TEXTURE_COPY_LOCATION() = default; explicit CD3DX12_TEXTURE_COPY_LOCATION(const D3D12_TEXTURE_COPY_LOCATION &o) : D3D12_TEXTURE_COPY_LOCATION(o) {} CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource* pRes) { pResource = pRes; Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; PlacedFootprint = {}; } CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource* pRes, D3D12_PLACED_SUBRESOURCE_FOOTPRINT const& Footprint) { pResource = pRes; Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; PlacedFootprint = Footprint; } CD3DX12_TEXTURE_COPY_LOCATION(_In_ ID3D12Resource* pRes, UINT Sub) { pResource = pRes; Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; SubresourceIndex = Sub; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_DESCRIPTOR_RANGE : public D3D12_DESCRIPTOR_RANGE { CD3DX12_DESCRIPTOR_RANGE() = default; explicit CD3DX12_DESCRIPTOR_RANGE(const D3D12_DESCRIPTOR_RANGE &o) : D3D12_DESCRIPTOR_RANGE(o) {} CD3DX12_DESCRIPTOR_RANGE( D3D12_DESCRIPTOR_RANGE_TYPE rangeType, UINT numDescriptors, UINT baseShaderRegister, UINT registerSpace = 0, UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { Init(rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); } inline void Init( D3D12_DESCRIPTOR_RANGE_TYPE rangeType, UINT numDescriptors, UINT baseShaderRegister, UINT registerSpace = 0, UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { Init(*this, rangeType, numDescriptors, baseShaderRegister, registerSpace, offsetInDescriptorsFromTableStart); } static inline void Init( _Out_ D3D12_DESCRIPTOR_RANGE &range, D3D12_DESCRIPTOR_RANGE_TYPE rangeType, UINT numDescriptors, UINT baseShaderRegister, UINT registerSpace = 0, UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { range.RangeType = rangeType; range.NumDescriptors = numDescriptors; range.BaseShaderRegister = baseShaderRegister; range.RegisterSpace = registerSpace; range.OffsetInDescriptorsFromTableStart = offsetInDescriptorsFromTableStart; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_DESCRIPTOR_TABLE : public D3D12_ROOT_DESCRIPTOR_TABLE { CD3DX12_ROOT_DESCRIPTOR_TABLE() = default; explicit CD3DX12_ROOT_DESCRIPTOR_TABLE(const D3D12_ROOT_DESCRIPTOR_TABLE &o) : D3D12_ROOT_DESCRIPTOR_TABLE(o) {} CD3DX12_ROOT_DESCRIPTOR_TABLE( UINT numDescriptorRanges, _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) { Init(numDescriptorRanges, _pDescriptorRanges); } inline void Init( UINT numDescriptorRanges, _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) { Init(*this, numDescriptorRanges, _pDescriptorRanges); } static inline void Init( _Out_ D3D12_ROOT_DESCRIPTOR_TABLE &rootDescriptorTable, UINT numDescriptorRanges, _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* _pDescriptorRanges) { rootDescriptorTable.NumDescriptorRanges = numDescriptorRanges; rootDescriptorTable.pDescriptorRanges = _pDescriptorRanges; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_CONSTANTS : public D3D12_ROOT_CONSTANTS { CD3DX12_ROOT_CONSTANTS() = default; explicit CD3DX12_ROOT_CONSTANTS(const D3D12_ROOT_CONSTANTS &o) : D3D12_ROOT_CONSTANTS(o) {} CD3DX12_ROOT_CONSTANTS( UINT num32BitValues, UINT shaderRegister, UINT registerSpace = 0) { Init(num32BitValues, shaderRegister, registerSpace); } inline void Init( UINT num32BitValues, UINT shaderRegister, UINT registerSpace = 0) { Init(*this, num32BitValues, shaderRegister, registerSpace); } static inline void Init( _Out_ D3D12_ROOT_CONSTANTS &rootConstants, UINT num32BitValues, UINT shaderRegister, UINT registerSpace = 0) { rootConstants.Num32BitValues = num32BitValues; rootConstants.ShaderRegister = shaderRegister; rootConstants.RegisterSpace = registerSpace; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_DESCRIPTOR : public D3D12_ROOT_DESCRIPTOR { CD3DX12_ROOT_DESCRIPTOR() = default; explicit CD3DX12_ROOT_DESCRIPTOR(const D3D12_ROOT_DESCRIPTOR &o) : D3D12_ROOT_DESCRIPTOR(o) {} CD3DX12_ROOT_DESCRIPTOR( UINT shaderRegister, UINT registerSpace = 0) { Init(shaderRegister, registerSpace); } inline void Init( UINT shaderRegister, UINT registerSpace = 0) { Init(*this, shaderRegister, registerSpace); } static inline void Init(_Out_ D3D12_ROOT_DESCRIPTOR &table, UINT shaderRegister, UINT registerSpace = 0) { table.ShaderRegister = shaderRegister; table.RegisterSpace = registerSpace; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_PARAMETER : public D3D12_ROOT_PARAMETER { CD3DX12_ROOT_PARAMETER() = default; explicit CD3DX12_ROOT_PARAMETER(const D3D12_ROOT_PARAMETER &o) : D3D12_ROOT_PARAMETER(o) {} static inline void InitAsDescriptorTable( _Out_ D3D12_ROOT_PARAMETER &rootParam, UINT numDescriptorRanges, _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR_TABLE::Init(rootParam.DescriptorTable, numDescriptorRanges, pDescriptorRanges); } static inline void InitAsConstants( _Out_ D3D12_ROOT_PARAMETER &rootParam, UINT num32BitValues, UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_CONSTANTS::Init(rootParam.Constants, num32BitValues, shaderRegister, registerSpace); } static inline void InitAsConstantBufferView( _Out_ D3D12_ROOT_PARAMETER &rootParam, UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); } static inline void InitAsShaderResourceView( _Out_ D3D12_ROOT_PARAMETER &rootParam, UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); } static inline void InitAsUnorderedAccessView( _Out_ D3D12_ROOT_PARAMETER &rootParam, UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR::Init(rootParam.Descriptor, shaderRegister, registerSpace); } inline void InitAsDescriptorTable( UINT numDescriptorRanges, _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE* pDescriptorRanges, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsDescriptorTable(*this, numDescriptorRanges, pDescriptorRanges, visibility); } inline void InitAsConstants( UINT num32BitValues, UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsConstants(*this, num32BitValues, shaderRegister, registerSpace, visibility); } inline void InitAsConstantBufferView( UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsConstantBufferView(*this, shaderRegister, registerSpace, visibility); } inline void InitAsShaderResourceView( UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsShaderResourceView(*this, shaderRegister, registerSpace, visibility); } inline void InitAsUnorderedAccessView( UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsUnorderedAccessView(*this, shaderRegister, registerSpace, visibility); } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_STATIC_SAMPLER_DESC : public D3D12_STATIC_SAMPLER_DESC { CD3DX12_STATIC_SAMPLER_DESC() = default; explicit CD3DX12_STATIC_SAMPLER_DESC(const D3D12_STATIC_SAMPLER_DESC &o) : D3D12_STATIC_SAMPLER_DESC(o) {} CD3DX12_STATIC_SAMPLER_DESC( UINT shaderRegister, D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, FLOAT mipLODBias = 0, UINT maxAnisotropy = 16, D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, FLOAT minLOD = 0.f, FLOAT maxLOD = D3D12_FLOAT32_MAX, D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, UINT registerSpace = 0) { Init( shaderRegister, filter, addressU, addressV, addressW, mipLODBias, maxAnisotropy, comparisonFunc, borderColor, minLOD, maxLOD, shaderVisibility, registerSpace); } static inline void Init( _Out_ D3D12_STATIC_SAMPLER_DESC &samplerDesc, UINT shaderRegister, D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, FLOAT mipLODBias = 0, UINT maxAnisotropy = 16, D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, FLOAT minLOD = 0.f, FLOAT maxLOD = D3D12_FLOAT32_MAX, D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, UINT registerSpace = 0) { samplerDesc.ShaderRegister = shaderRegister; samplerDesc.Filter = filter; samplerDesc.AddressU = addressU; samplerDesc.AddressV = addressV; samplerDesc.AddressW = addressW; samplerDesc.MipLODBias = mipLODBias; samplerDesc.MaxAnisotropy = maxAnisotropy; samplerDesc.ComparisonFunc = comparisonFunc; samplerDesc.BorderColor = borderColor; samplerDesc.MinLOD = minLOD; samplerDesc.MaxLOD = maxLOD; samplerDesc.ShaderVisibility = shaderVisibility; samplerDesc.RegisterSpace = registerSpace; } inline void Init( UINT shaderRegister, D3D12_FILTER filter = D3D12_FILTER_ANISOTROPIC, D3D12_TEXTURE_ADDRESS_MODE addressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE addressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP, D3D12_TEXTURE_ADDRESS_MODE addressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP, FLOAT mipLODBias = 0, UINT maxAnisotropy = 16, D3D12_COMPARISON_FUNC comparisonFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL, D3D12_STATIC_BORDER_COLOR borderColor = D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, FLOAT minLOD = 0.f, FLOAT maxLOD = D3D12_FLOAT32_MAX, D3D12_SHADER_VISIBILITY shaderVisibility = D3D12_SHADER_VISIBILITY_ALL, UINT registerSpace = 0) { Init( *this, shaderRegister, filter, addressU, addressV, addressW, mipLODBias, maxAnisotropy, comparisonFunc, borderColor, minLOD, maxLOD, shaderVisibility, registerSpace); } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_SIGNATURE_DESC : public D3D12_ROOT_SIGNATURE_DESC { CD3DX12_ROOT_SIGNATURE_DESC() = default; explicit CD3DX12_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC &o) : D3D12_ROOT_SIGNATURE_DESC(o) {} CD3DX12_ROOT_SIGNATURE_DESC( UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { Init(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); } CD3DX12_ROOT_SIGNATURE_DESC(CD3DX12_DEFAULT) { Init(0, nullptr, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_NONE); } inline void Init( UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { Init(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); } static inline void Init( _Out_ D3D12_ROOT_SIGNATURE_DESC &desc, UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { desc.NumParameters = numParameters; desc.pParameters = _pParameters; desc.NumStaticSamplers = numStaticSamplers; desc.pStaticSamplers = _pStaticSamplers; desc.Flags = flags; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_DESCRIPTOR_RANGE1 : public D3D12_DESCRIPTOR_RANGE1 { CD3DX12_DESCRIPTOR_RANGE1() = default; explicit CD3DX12_DESCRIPTOR_RANGE1(const D3D12_DESCRIPTOR_RANGE1 &o) : D3D12_DESCRIPTOR_RANGE1(o) {} CD3DX12_DESCRIPTOR_RANGE1( D3D12_DESCRIPTOR_RANGE_TYPE rangeType, UINT numDescriptors, UINT baseShaderRegister, UINT registerSpace = 0, D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE, UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { Init(rangeType, numDescriptors, baseShaderRegister, registerSpace, flags, offsetInDescriptorsFromTableStart); } inline void Init( D3D12_DESCRIPTOR_RANGE_TYPE rangeType, UINT numDescriptors, UINT baseShaderRegister, UINT registerSpace = 0, D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE, UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { Init(*this, rangeType, numDescriptors, baseShaderRegister, registerSpace, flags, offsetInDescriptorsFromTableStart); } static inline void Init( _Out_ D3D12_DESCRIPTOR_RANGE1 &range, D3D12_DESCRIPTOR_RANGE_TYPE rangeType, UINT numDescriptors, UINT baseShaderRegister, UINT registerSpace = 0, D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE, UINT offsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND) { range.RangeType = rangeType; range.NumDescriptors = numDescriptors; range.BaseShaderRegister = baseShaderRegister; range.RegisterSpace = registerSpace; range.Flags = flags; range.OffsetInDescriptorsFromTableStart = offsetInDescriptorsFromTableStart; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_DESCRIPTOR_TABLE1 : public D3D12_ROOT_DESCRIPTOR_TABLE1 { CD3DX12_ROOT_DESCRIPTOR_TABLE1() = default; explicit CD3DX12_ROOT_DESCRIPTOR_TABLE1(const D3D12_ROOT_DESCRIPTOR_TABLE1 &o) : D3D12_ROOT_DESCRIPTOR_TABLE1(o) {} CD3DX12_ROOT_DESCRIPTOR_TABLE1( UINT numDescriptorRanges, _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* _pDescriptorRanges) { Init(numDescriptorRanges, _pDescriptorRanges); } inline void Init( UINT numDescriptorRanges, _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* _pDescriptorRanges) { Init(*this, numDescriptorRanges, _pDescriptorRanges); } static inline void Init( _Out_ D3D12_ROOT_DESCRIPTOR_TABLE1 &rootDescriptorTable, UINT numDescriptorRanges, _In_reads_opt_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* _pDescriptorRanges) { rootDescriptorTable.NumDescriptorRanges = numDescriptorRanges; rootDescriptorTable.pDescriptorRanges = _pDescriptorRanges; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_DESCRIPTOR1 : public D3D12_ROOT_DESCRIPTOR1 { CD3DX12_ROOT_DESCRIPTOR1() = default; explicit CD3DX12_ROOT_DESCRIPTOR1(const D3D12_ROOT_DESCRIPTOR1 &o) : D3D12_ROOT_DESCRIPTOR1(o) {} CD3DX12_ROOT_DESCRIPTOR1( UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE) { Init(shaderRegister, registerSpace, flags); } inline void Init( UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE) { Init(*this, shaderRegister, registerSpace, flags); } static inline void Init( _Out_ D3D12_ROOT_DESCRIPTOR1 &table, UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE) { table.ShaderRegister = shaderRegister; table.RegisterSpace = registerSpace; table.Flags = flags; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_ROOT_PARAMETER1 : public D3D12_ROOT_PARAMETER1 { CD3DX12_ROOT_PARAMETER1() = default; explicit CD3DX12_ROOT_PARAMETER1(const D3D12_ROOT_PARAMETER1 &o) : D3D12_ROOT_PARAMETER1(o) {} static inline void InitAsDescriptorTable( _Out_ D3D12_ROOT_PARAMETER1 &rootParam, UINT numDescriptorRanges, _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* pDescriptorRanges, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR_TABLE1::Init(rootParam.DescriptorTable, numDescriptorRanges, pDescriptorRanges); } static inline void InitAsConstants( _Out_ D3D12_ROOT_PARAMETER1 &rootParam, UINT num32BitValues, UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_CONSTANTS::Init(rootParam.Constants, num32BitValues, shaderRegister, registerSpace); } static inline void InitAsConstantBufferView( _Out_ D3D12_ROOT_PARAMETER1 &rootParam, UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR1::Init(rootParam.Descriptor, shaderRegister, registerSpace, flags); } static inline void InitAsShaderResourceView( _Out_ D3D12_ROOT_PARAMETER1 &rootParam, UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR1::Init(rootParam.Descriptor, shaderRegister, registerSpace, flags); } static inline void InitAsUnorderedAccessView( _Out_ D3D12_ROOT_PARAMETER1 &rootParam, UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; rootParam.ShaderVisibility = visibility; CD3DX12_ROOT_DESCRIPTOR1::Init(rootParam.Descriptor, shaderRegister, registerSpace, flags); } inline void InitAsDescriptorTable( UINT numDescriptorRanges, _In_reads_(numDescriptorRanges) const D3D12_DESCRIPTOR_RANGE1* pDescriptorRanges, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsDescriptorTable(*this, numDescriptorRanges, pDescriptorRanges, visibility); } inline void InitAsConstants( UINT num32BitValues, UINT shaderRegister, UINT registerSpace = 0, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsConstants(*this, num32BitValues, shaderRegister, registerSpace, visibility); } inline void InitAsConstantBufferView( UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsConstantBufferView(*this, shaderRegister, registerSpace, flags, visibility); } inline void InitAsShaderResourceView( UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsShaderResourceView(*this, shaderRegister, registerSpace, flags, visibility); } inline void InitAsUnorderedAccessView( UINT shaderRegister, UINT registerSpace = 0, D3D12_ROOT_DESCRIPTOR_FLAGS flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE, D3D12_SHADER_VISIBILITY visibility = D3D12_SHADER_VISIBILITY_ALL) { InitAsUnorderedAccessView(*this, shaderRegister, registerSpace, flags, visibility); } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC : public D3D12_VERSIONED_ROOT_SIGNATURE_DESC { CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC() = default; explicit CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC &o) : D3D12_VERSIONED_ROOT_SIGNATURE_DESC(o) {} explicit CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC &o) { Version = D3D_ROOT_SIGNATURE_VERSION_1_0; Desc_1_0 = o; } explicit CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(const D3D12_ROOT_SIGNATURE_DESC1 &o) { Version = D3D_ROOT_SIGNATURE_VERSION_1_1; Desc_1_1 = o; } CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC( UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { Init_1_0(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); } CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC( UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER1* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { Init_1_1(numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); } CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC(CD3DX12_DEFAULT) { Init_1_1(0, nullptr, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_NONE); } inline void Init_1_0( UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { Init_1_0(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); } static inline void Init_1_0( _Out_ D3D12_VERSIONED_ROOT_SIGNATURE_DESC &desc, UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_0; desc.Desc_1_0.NumParameters = numParameters; desc.Desc_1_0.pParameters = _pParameters; desc.Desc_1_0.NumStaticSamplers = numStaticSamplers; desc.Desc_1_0.pStaticSamplers = _pStaticSamplers; desc.Desc_1_0.Flags = flags; } inline void Init_1_1( UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER1* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { Init_1_1(*this, numParameters, _pParameters, numStaticSamplers, _pStaticSamplers, flags); } static inline void Init_1_1( _Out_ D3D12_VERSIONED_ROOT_SIGNATURE_DESC &desc, UINT numParameters, _In_reads_opt_(numParameters) const D3D12_ROOT_PARAMETER1* _pParameters, UINT numStaticSamplers = 0, _In_reads_opt_(numStaticSamplers) const D3D12_STATIC_SAMPLER_DESC* _pStaticSamplers = nullptr, D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_NONE) { desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; desc.Desc_1_1.NumParameters = numParameters; desc.Desc_1_1.pParameters = _pParameters; desc.Desc_1_1.NumStaticSamplers = numStaticSamplers; desc.Desc_1_1.pStaticSamplers = _pStaticSamplers; desc.Desc_1_1.Flags = flags; } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_CPU_DESCRIPTOR_HANDLE : public D3D12_CPU_DESCRIPTOR_HANDLE { CD3DX12_CPU_DESCRIPTOR_HANDLE() = default; explicit CD3DX12_CPU_DESCRIPTOR_HANDLE(const D3D12_CPU_DESCRIPTOR_HANDLE &o) : D3D12_CPU_DESCRIPTOR_HANDLE(o) {} CD3DX12_CPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) { ptr = 0; } CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) { InitOffsetted(other, offsetScaledByIncrementSize); } CD3DX12_CPU_DESCRIPTOR_HANDLE(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) { InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); } CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) { ptr = SIZE_T(INT64(ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); return *this; } CD3DX12_CPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) { ptr = SIZE_T(INT64(ptr) + INT64(offsetScaledByIncrementSize)); return *this; } bool operator==(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) const { return (ptr == other.ptr); } bool operator!=(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE& other) const { return (ptr != other.ptr); } CD3DX12_CPU_DESCRIPTOR_HANDLE &operator=(const D3D12_CPU_DESCRIPTOR_HANDLE &other) { ptr = other.ptr; return *this; } inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) { InitOffsetted(*this, base, offsetScaledByIncrementSize); } inline void InitOffsetted(_In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) { InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); } static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) { handle.ptr = SIZE_T(INT64(base.ptr) + INT64(offsetScaledByIncrementSize)); } static inline void InitOffsetted(_Out_ D3D12_CPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_CPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) { handle.ptr = SIZE_T(INT64(base.ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); } }; //------------------------------------------------------------------------------------------------ struct CD3DX12_GPU_DESCRIPTOR_HANDLE : public D3D12_GPU_DESCRIPTOR_HANDLE { CD3DX12_GPU_DESCRIPTOR_HANDLE() = default; explicit CD3DX12_GPU_DESCRIPTOR_HANDLE(const D3D12_GPU_DESCRIPTOR_HANDLE &o) : D3D12_GPU_DESCRIPTOR_HANDLE(o) {} CD3DX12_GPU_DESCRIPTOR_HANDLE(CD3DX12_DEFAULT) { ptr = 0; } CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetScaledByIncrementSize) { InitOffsetted(other, offsetScaledByIncrementSize); } CD3DX12_GPU_DESCRIPTOR_HANDLE(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &other, INT offsetInDescriptors, UINT descriptorIncrementSize) { InitOffsetted(other, offsetInDescriptors, descriptorIncrementSize); } CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetInDescriptors, UINT descriptorIncrementSize) { ptr = UINT64(INT64(ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); return *this; } CD3DX12_GPU_DESCRIPTOR_HANDLE& Offset(INT offsetScaledByIncrementSize) { ptr = UINT64(INT64(ptr) + INT64(offsetScaledByIncrementSize)); return *this; } inline bool operator==(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) const { return (ptr == other.ptr); } inline bool operator!=(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE& other) const { return (ptr != other.ptr); } CD3DX12_GPU_DESCRIPTOR_HANDLE &operator=(const D3D12_GPU_DESCRIPTOR_HANDLE &other) { ptr = other.ptr; return *this; } inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) { InitOffsetted(*this, base, offsetScaledByIncrementSize); } inline void InitOffsetted(_In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) { InitOffsetted(*this, base, offsetInDescriptors, descriptorIncrementSize); } static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetScaledByIncrementSize) { handle.ptr = UINT64(INT64(base.ptr) + INT64(offsetScaledByIncrementSize)); } static inline void InitOffsetted(_Out_ D3D12_GPU_DESCRIPTOR_HANDLE &handle, _In_ const D3D12_GPU_DESCRIPTOR_HANDLE &base, INT offsetInDescriptors, UINT descriptorIncrementSize) { handle.ptr = UINT64(INT64(base.ptr) + INT64(offsetInDescriptors) * INT64(descriptorIncrementSize)); } }; //------------------------------------------------------------------------------------------------ inline UINT D3D12CalcSubresource( UINT MipSlice, UINT ArraySlice, UINT PlaneSlice, UINT MipLevels, UINT ArraySize ) { return MipSlice + ArraySlice * MipLevels + PlaneSlice * MipLevels * ArraySize; } //------------------------------------------------------------------------------------------------ template inline void D3D12DecomposeSubresource( UINT Subresource, UINT MipLevels, UINT ArraySize, _Out_ T& MipSlice, _Out_ U& ArraySlice, _Out_ V& PlaneSlice ) { MipSlice = static_cast(Subresource % MipLevels); ArraySlice = static_cast((Subresource / MipLevels) % ArraySize); PlaneSlice = static_cast(Subresource / (MipLevels * ArraySize)); } //------------------------------------------------------------------------------------------------ inline UINT8 D3D12GetFormatPlaneCount( _In_ ID3D12Device* pDevice, DXGI_FORMAT Format ) { D3D12_FEATURE_DATA_FORMAT_INFO formatInfo = { Format, 0 }; if (FAILED(pDevice->CheckFeatureSupport(D3D12_FEATURE_FORMAT_INFO, &formatInfo, sizeof(formatInfo)))) { return 0; } return formatInfo.PlaneCount; } //------------------------------------------------------------------------------------------------ struct CD3DX12_RESOURCE_DESC : public D3D12_RESOURCE_DESC { CD3DX12_RESOURCE_DESC() = default; explicit CD3DX12_RESOURCE_DESC( const D3D12_RESOURCE_DESC& o ) : D3D12_RESOURCE_DESC( o ) {} CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION dimension, UINT64 alignment, UINT64 width, UINT height, UINT16 depthOrArraySize, UINT16 mipLevels, DXGI_FORMAT format, UINT sampleCount, UINT sampleQuality, D3D12_TEXTURE_LAYOUT layout, D3D12_RESOURCE_FLAGS flags ) { Dimension = dimension; Alignment = alignment; Width = width; Height = height; DepthOrArraySize = depthOrArraySize; MipLevels = mipLevels; Format = format; SampleDesc.Count = sampleCount; SampleDesc.Quality = sampleQuality; Layout = layout; Flags = flags; } static inline CD3DX12_RESOURCE_DESC Buffer( const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) { return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes, 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); } static inline CD3DX12_RESOURCE_DESC Buffer( UINT64 width, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, UINT64 alignment = 0 ) { return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags ); } static inline CD3DX12_RESOURCE_DESC Tex1D( DXGI_FORMAT format, UINT64 width, UINT16 arraySize = 1, UINT16 mipLevels = 0, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, UINT64 alignment = 0 ) { return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize, mipLevels, format, 1, 0, layout, flags ); } static inline CD3DX12_RESOURCE_DESC Tex2D( DXGI_FORMAT format, UINT64 width, UINT height, UINT16 arraySize = 1, UINT16 mipLevels = 0, UINT sampleCount = 1, UINT sampleQuality = 0, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, UINT64 alignment = 0 ) { return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize, mipLevels, format, sampleCount, sampleQuality, layout, flags ); } static inline CD3DX12_RESOURCE_DESC Tex3D( DXGI_FORMAT format, UINT64 width, UINT height, UINT16 depth, UINT16 mipLevels = 0, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, UINT64 alignment = 0 ) { return CD3DX12_RESOURCE_DESC( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth, mipLevels, format, 1, 0, layout, flags ); } inline UINT16 Depth() const { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } inline UINT16 ArraySize() const { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } inline UINT8 PlaneCount(_In_ ID3D12Device* pDevice) const { return D3D12GetFormatPlaneCount(pDevice, Format); } inline UINT Subresources(_In_ ID3D12Device* pDevice) const { return MipLevels * ArraySize() * PlaneCount(pDevice); } inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); } }; inline bool operator==( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) { return l.Dimension == r.Dimension && l.Alignment == r.Alignment && l.Width == r.Width && l.Height == r.Height && l.DepthOrArraySize == r.DepthOrArraySize && l.MipLevels == r.MipLevels && l.Format == r.Format && l.SampleDesc.Count == r.SampleDesc.Count && l.SampleDesc.Quality == r.SampleDesc.Quality && l.Layout == r.Layout && l.Flags == r.Flags; } inline bool operator!=( const D3D12_RESOURCE_DESC& l, const D3D12_RESOURCE_DESC& r ) { return !( l == r ); } //------------------------------------------------------------------------------------------------ struct CD3DX12_RESOURCE_DESC1 : public D3D12_RESOURCE_DESC1 { CD3DX12_RESOURCE_DESC1() = default; explicit CD3DX12_RESOURCE_DESC1( const D3D12_RESOURCE_DESC1& o ) : D3D12_RESOURCE_DESC1( o ) {} CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION dimension, UINT64 alignment, UINT64 width, UINT height, UINT16 depthOrArraySize, UINT16 mipLevels, DXGI_FORMAT format, UINT sampleCount, UINT sampleQuality, D3D12_TEXTURE_LAYOUT layout, D3D12_RESOURCE_FLAGS flags, UINT samplerFeedbackMipRegionWidth = 0, UINT samplerFeedbackMipRegionHeight = 0, UINT samplerFeedbackMipRegionDepth = 0) { Dimension = dimension; Alignment = alignment; Width = width; Height = height; DepthOrArraySize = depthOrArraySize; MipLevels = mipLevels; Format = format; SampleDesc.Count = sampleCount; SampleDesc.Quality = sampleQuality; Layout = layout; Flags = flags; SamplerFeedbackMipRegion.Width = samplerFeedbackMipRegionWidth; SamplerFeedbackMipRegion.Height = samplerFeedbackMipRegionHeight; SamplerFeedbackMipRegion.Depth = samplerFeedbackMipRegionDepth; } static inline CD3DX12_RESOURCE_DESC1 Buffer( const D3D12_RESOURCE_ALLOCATION_INFO& resAllocInfo, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE ) { return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_BUFFER, resAllocInfo.Alignment, resAllocInfo.SizeInBytes, 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags, 0, 0, 0 ); } static inline CD3DX12_RESOURCE_DESC1 Buffer( UINT64 width, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, UINT64 alignment = 0 ) { return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_BUFFER, alignment, width, 1, 1, 1, DXGI_FORMAT_UNKNOWN, 1, 0, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, flags, 0, 0, 0 ); } static inline CD3DX12_RESOURCE_DESC1 Tex1D( DXGI_FORMAT format, UINT64 width, UINT16 arraySize = 1, UINT16 mipLevels = 0, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, UINT64 alignment = 0 ) { return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_TEXTURE1D, alignment, width, 1, arraySize, mipLevels, format, 1, 0, layout, flags, 0, 0, 0 ); } static inline CD3DX12_RESOURCE_DESC1 Tex2D( DXGI_FORMAT format, UINT64 width, UINT height, UINT16 arraySize = 1, UINT16 mipLevels = 0, UINT sampleCount = 1, UINT sampleQuality = 0, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, UINT64 alignment = 0, UINT samplerFeedbackMipRegionWidth = 0, UINT samplerFeedbackMipRegionHeight = 0, UINT samplerFeedbackMipRegionDepth = 0) { return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_TEXTURE2D, alignment, width, height, arraySize, mipLevels, format, sampleCount, sampleQuality, layout, flags, samplerFeedbackMipRegionWidth, samplerFeedbackMipRegionHeight, samplerFeedbackMipRegionDepth ); } static inline CD3DX12_RESOURCE_DESC1 Tex3D( DXGI_FORMAT format, UINT64 width, UINT height, UINT16 depth, UINT16 mipLevels = 0, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE, D3D12_TEXTURE_LAYOUT layout = D3D12_TEXTURE_LAYOUT_UNKNOWN, UINT64 alignment = 0 ) { return CD3DX12_RESOURCE_DESC1( D3D12_RESOURCE_DIMENSION_TEXTURE3D, alignment, width, height, depth, mipLevels, format, 1, 0, layout, flags, 0, 0, 0 ); } inline UINT16 Depth() const { return (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } inline UINT16 ArraySize() const { return (Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? DepthOrArraySize : 1); } inline UINT8 PlaneCount(_In_ ID3D12Device* pDevice) const { return D3D12GetFormatPlaneCount(pDevice, Format); } inline UINT Subresources(_In_ ID3D12Device* pDevice) const { return MipLevels * ArraySize() * PlaneCount(pDevice); } inline UINT CalcSubresource(UINT MipSlice, UINT ArraySlice, UINT PlaneSlice) { return D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize()); } }; inline bool operator==( const D3D12_RESOURCE_DESC1& l, const D3D12_RESOURCE_DESC1& r ) { return l.Dimension == r.Dimension && l.Alignment == r.Alignment && l.Width == r.Width && l.Height == r.Height && l.DepthOrArraySize == r.DepthOrArraySize && l.MipLevels == r.MipLevels && l.Format == r.Format && l.SampleDesc.Count == r.SampleDesc.Count && l.SampleDesc.Quality == r.SampleDesc.Quality && l.Layout == r.Layout && l.Flags == r.Flags && l.SamplerFeedbackMipRegion.Width == r.SamplerFeedbackMipRegion.Width && l.SamplerFeedbackMipRegion.Height == r.SamplerFeedbackMipRegion.Height && l.SamplerFeedbackMipRegion.Depth == r.SamplerFeedbackMipRegion.Depth; } inline bool operator!=( const D3D12_RESOURCE_DESC1& l, const D3D12_RESOURCE_DESC1& r ) { return !( l == r ); } //------------------------------------------------------------------------------------------------ struct CD3DX12_VIEW_INSTANCING_DESC : public D3D12_VIEW_INSTANCING_DESC { CD3DX12_VIEW_INSTANCING_DESC() = default; explicit CD3DX12_VIEW_INSTANCING_DESC( const D3D12_VIEW_INSTANCING_DESC& o ) : D3D12_VIEW_INSTANCING_DESC( o ) {} explicit CD3DX12_VIEW_INSTANCING_DESC( CD3DX12_DEFAULT ) { ViewInstanceCount = 0; pViewInstanceLocations = nullptr; Flags = D3D12_VIEW_INSTANCING_FLAG_NONE; } explicit CD3DX12_VIEW_INSTANCING_DESC( UINT InViewInstanceCount, const D3D12_VIEW_INSTANCE_LOCATION* InViewInstanceLocations, D3D12_VIEW_INSTANCING_FLAGS InFlags) { ViewInstanceCount = InViewInstanceCount; pViewInstanceLocations = InViewInstanceLocations; Flags = InFlags; } }; //------------------------------------------------------------------------------------------------ // Row-by-row memcpy inline void MemcpySubresource( _In_ const D3D12_MEMCPY_DEST* pDest, _In_ const D3D12_SUBRESOURCE_DATA* pSrc, SIZE_T RowSizeInBytes, UINT NumRows, UINT NumSlices) { for (UINT z = 0; z < NumSlices; ++z) { auto pDestSlice = reinterpret_cast(pDest->pData) + pDest->SlicePitch * z; auto pSrcSlice = reinterpret_cast(pSrc->pData) + pSrc->SlicePitch * LONG_PTR(z); for (UINT y = 0; y < NumRows; ++y) { memcpy(pDestSlice + pDest->RowPitch * y, pSrcSlice + pSrc->RowPitch * LONG_PTR(y), RowSizeInBytes); } } } //------------------------------------------------------------------------------------------------ // Returns required size of a buffer to be used for data upload inline UINT64 GetRequiredIntermediateSize( _In_ ID3D12Resource* pDestinationResource, _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources) { auto Desc = pDestinationResource->GetDesc(); UINT64 RequiredSize = 0; ID3D12Device* pDevice = nullptr; pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, 0, nullptr, nullptr, nullptr, &RequiredSize); pDevice->Release(); return RequiredSize; } //------------------------------------------------------------------------------------------------ // All arrays must be populated (e.g. by calling GetCopyableFootprints) inline UINT64 UpdateSubresources( _In_ ID3D12GraphicsCommandList* pCmdList, _In_ ID3D12Resource* pDestinationResource, _In_ ID3D12Resource* pIntermediate, _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, UINT64 RequiredSize, _In_reads_(NumSubresources) const D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts, _In_reads_(NumSubresources) const UINT* pNumRows, _In_reads_(NumSubresources) const UINT64* pRowSizesInBytes, _In_reads_(NumSubresources) const D3D12_SUBRESOURCE_DATA* pSrcData) { // Minor validation auto IntermediateDesc = pIntermediate->GetDesc(); auto DestinationDesc = pDestinationResource->GetDesc(); if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset || RequiredSize > SIZE_T(-1) || (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && (FirstSubresource != 0 || NumSubresources != 1))) { return 0; } BYTE* pData; HRESULT hr = pIntermediate->Map(0, nullptr, reinterpret_cast(&pData)); if (FAILED(hr)) { return 0; } for (UINT i = 0; i < NumSubresources; ++i) { if (pRowSizesInBytes[i] > SIZE_T(-1)) return 0; D3D12_MEMCPY_DEST DestData = { pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, SIZE_T(pLayouts[i].Footprint.RowPitch) * SIZE_T(pNumRows[i]) }; MemcpySubresource(&DestData, &pSrcData[i], static_cast(pRowSizesInBytes[i]), pNumRows[i], pLayouts[i].Footprint.Depth); } pIntermediate->Unmap(0, nullptr); if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { pCmdList->CopyBufferRegion( pDestinationResource, 0, pIntermediate, pLayouts[0].Offset, pLayouts[0].Footprint.Width); } else { for (UINT i = 0; i < NumSubresources; ++i) { CD3DX12_TEXTURE_COPY_LOCATION Dst(pDestinationResource, i + FirstSubresource); CD3DX12_TEXTURE_COPY_LOCATION Src(pIntermediate, pLayouts[i]); pCmdList->CopyTextureRegion(&Dst, 0, 0, 0, &Src, nullptr); } } return RequiredSize; } //------------------------------------------------------------------------------------------------ // Heap-allocating UpdateSubresources implementation inline UINT64 UpdateSubresources( _In_ ID3D12GraphicsCommandList* pCmdList, _In_ ID3D12Resource* pDestinationResource, _In_ ID3D12Resource* pIntermediate, UINT64 IntermediateOffset, _In_range_(0,D3D12_REQ_SUBRESOURCES) UINT FirstSubresource, _In_range_(0,D3D12_REQ_SUBRESOURCES-FirstSubresource) UINT NumSubresources, _In_reads_(NumSubresources) D3D12_SUBRESOURCE_DATA* pSrcData) { UINT64 RequiredSize = 0; UINT64 MemToAlloc = static_cast(sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(UINT) + sizeof(UINT64)) * NumSubresources; if (MemToAlloc > SIZE_MAX) { return 0; } void* pMem = HeapAlloc(GetProcessHeap(), 0, static_cast(MemToAlloc)); if (pMem == nullptr) { return 0; } auto pLayouts = reinterpret_cast(pMem); UINT64* pRowSizesInBytes = reinterpret_cast(pLayouts + NumSubresources); UINT* pNumRows = reinterpret_cast(pRowSizesInBytes + NumSubresources); auto Desc = pDestinationResource->GetDesc(); ID3D12Device* pDevice = nullptr; pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, pLayouts, pNumRows, pRowSizesInBytes, &RequiredSize); pDevice->Release(); UINT64 Result = UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, pLayouts, pNumRows, pRowSizesInBytes, pSrcData); HeapFree(GetProcessHeap(), 0, pMem); return Result; } //------------------------------------------------------------------------------------------------ // Stack-allocating UpdateSubresources implementation template inline UINT64 UpdateSubresources( _In_ ID3D12GraphicsCommandList* pCmdList, _In_ ID3D12Resource* pDestinationResource, _In_ ID3D12Resource* pIntermediate, UINT64 IntermediateOffset, _In_range_(0, MaxSubresources) UINT FirstSubresource, _In_range_(1, MaxSubresources - FirstSubresource) UINT NumSubresources, _In_reads_(NumSubresources) D3D12_SUBRESOURCE_DATA* pSrcData) { UINT64 RequiredSize = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT Layouts[MaxSubresources]; UINT NumRows[MaxSubresources]; UINT64 RowSizesInBytes[MaxSubresources]; auto Desc = pDestinationResource->GetDesc(); ID3D12Device* pDevice = nullptr; pDestinationResource->GetDevice(IID_ID3D12Device, reinterpret_cast(&pDevice)); pDevice->GetCopyableFootprints(&Desc, FirstSubresource, NumSubresources, IntermediateOffset, Layouts, NumRows, RowSizesInBytes, &RequiredSize); pDevice->Release(); return UpdateSubresources(pCmdList, pDestinationResource, pIntermediate, FirstSubresource, NumSubresources, RequiredSize, Layouts, NumRows, RowSizesInBytes, pSrcData); } //------------------------------------------------------------------------------------------------ inline bool D3D12IsLayoutOpaque( D3D12_TEXTURE_LAYOUT Layout ) { return Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN || Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; } //------------------------------------------------------------------------------------------------ template inline ID3D12CommandList * const * CommandListCast(t_CommandListType * const * pp) { // This cast is useful for passing strongly typed command list pointers into // ExecuteCommandLists. // This cast is valid as long as the const-ness is respected. D3D12 APIs do // respect the const-ness of their arguments. return reinterpret_cast(pp); } //------------------------------------------------------------------------------------------------ // D3D12 exports a new method for serializing root signatures in the Windows 10 Anniversary Update. // To help enable root signature 1.1 features when they are available and not require maintaining // two code paths for building root signatures, this helper method reconstructs a 1.0 signature when // 1.1 is not supported. inline HRESULT D3DX12SerializeVersionedRootSignature( _In_ const D3D12_VERSIONED_ROOT_SIGNATURE_DESC* pRootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION MaxVersion, _Outptr_ ID3DBlob** ppBlob, _Always_(_Outptr_opt_result_maybenull_) ID3DBlob** ppErrorBlob) { if (ppErrorBlob != nullptr) { *ppErrorBlob = nullptr; } switch (MaxVersion) { case D3D_ROOT_SIGNATURE_VERSION_1_0: switch (pRootSignatureDesc->Version) { case D3D_ROOT_SIGNATURE_VERSION_1_0: return D3D12SerializeRootSignature(&pRootSignatureDesc->Desc_1_0, D3D_ROOT_SIGNATURE_VERSION_1, ppBlob, ppErrorBlob); case D3D_ROOT_SIGNATURE_VERSION_1_1: { HRESULT hr = S_OK; const D3D12_ROOT_SIGNATURE_DESC1& desc_1_1 = pRootSignatureDesc->Desc_1_1; const SIZE_T ParametersSize = sizeof(D3D12_ROOT_PARAMETER) * desc_1_1.NumParameters; void* pParameters = (ParametersSize > 0) ? HeapAlloc(GetProcessHeap(), 0, ParametersSize) : nullptr; if (ParametersSize > 0 && pParameters == nullptr) { hr = E_OUTOFMEMORY; } auto pParameters_1_0 = reinterpret_cast(pParameters); if (SUCCEEDED(hr)) { for (UINT n = 0; n < desc_1_1.NumParameters; n++) { __analysis_assume(ParametersSize == sizeof(D3D12_ROOT_PARAMETER) * desc_1_1.NumParameters); pParameters_1_0[n].ParameterType = desc_1_1.pParameters[n].ParameterType; pParameters_1_0[n].ShaderVisibility = desc_1_1.pParameters[n].ShaderVisibility; switch (desc_1_1.pParameters[n].ParameterType) { case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: pParameters_1_0[n].Constants.Num32BitValues = desc_1_1.pParameters[n].Constants.Num32BitValues; pParameters_1_0[n].Constants.RegisterSpace = desc_1_1.pParameters[n].Constants.RegisterSpace; pParameters_1_0[n].Constants.ShaderRegister = desc_1_1.pParameters[n].Constants.ShaderRegister; break; case D3D12_ROOT_PARAMETER_TYPE_CBV: case D3D12_ROOT_PARAMETER_TYPE_SRV: case D3D12_ROOT_PARAMETER_TYPE_UAV: pParameters_1_0[n].Descriptor.RegisterSpace = desc_1_1.pParameters[n].Descriptor.RegisterSpace; pParameters_1_0[n].Descriptor.ShaderRegister = desc_1_1.pParameters[n].Descriptor.ShaderRegister; break; case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: const D3D12_ROOT_DESCRIPTOR_TABLE1& table_1_1 = desc_1_1.pParameters[n].DescriptorTable; const SIZE_T DescriptorRangesSize = sizeof(D3D12_DESCRIPTOR_RANGE) * table_1_1.NumDescriptorRanges; void* pDescriptorRanges = (DescriptorRangesSize > 0 && SUCCEEDED(hr)) ? HeapAlloc(GetProcessHeap(), 0, DescriptorRangesSize) : nullptr; if (DescriptorRangesSize > 0 && pDescriptorRanges == nullptr) { hr = E_OUTOFMEMORY; } auto pDescriptorRanges_1_0 = reinterpret_cast(pDescriptorRanges); if (SUCCEEDED(hr)) { for (UINT x = 0; x < table_1_1.NumDescriptorRanges; x++) { __analysis_assume(DescriptorRangesSize == sizeof(D3D12_DESCRIPTOR_RANGE) * table_1_1.NumDescriptorRanges); pDescriptorRanges_1_0[x].BaseShaderRegister = table_1_1.pDescriptorRanges[x].BaseShaderRegister; pDescriptorRanges_1_0[x].NumDescriptors = table_1_1.pDescriptorRanges[x].NumDescriptors; pDescriptorRanges_1_0[x].OffsetInDescriptorsFromTableStart = table_1_1.pDescriptorRanges[x].OffsetInDescriptorsFromTableStart; pDescriptorRanges_1_0[x].RangeType = table_1_1.pDescriptorRanges[x].RangeType; pDescriptorRanges_1_0[x].RegisterSpace = table_1_1.pDescriptorRanges[x].RegisterSpace; } } D3D12_ROOT_DESCRIPTOR_TABLE& table_1_0 = pParameters_1_0[n].DescriptorTable; table_1_0.NumDescriptorRanges = table_1_1.NumDescriptorRanges; table_1_0.pDescriptorRanges = pDescriptorRanges_1_0; } } } if (SUCCEEDED(hr)) { CD3DX12_ROOT_SIGNATURE_DESC desc_1_0(desc_1_1.NumParameters, pParameters_1_0, desc_1_1.NumStaticSamplers, desc_1_1.pStaticSamplers, desc_1_1.Flags); hr = D3D12SerializeRootSignature(&desc_1_0, D3D_ROOT_SIGNATURE_VERSION_1, ppBlob, ppErrorBlob); } if (pParameters) { for (UINT n = 0; n < desc_1_1.NumParameters; n++) { if (desc_1_1.pParameters[n].ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) { HeapFree(GetProcessHeap(), 0, reinterpret_cast(const_cast(pParameters_1_0[n].DescriptorTable.pDescriptorRanges))); } } HeapFree(GetProcessHeap(), 0, pParameters); } return hr; } } break; case D3D_ROOT_SIGNATURE_VERSION_1_1: return D3D12SerializeVersionedRootSignature(pRootSignatureDesc, ppBlob, ppErrorBlob); } return E_INVALIDARG; } //------------------------------------------------------------------------------------------------ struct CD3DX12_RT_FORMAT_ARRAY : public D3D12_RT_FORMAT_ARRAY { CD3DX12_RT_FORMAT_ARRAY() = default; explicit CD3DX12_RT_FORMAT_ARRAY(const D3D12_RT_FORMAT_ARRAY& o) : D3D12_RT_FORMAT_ARRAY(o) {} explicit CD3DX12_RT_FORMAT_ARRAY(_In_reads_(NumFormats) const DXGI_FORMAT* pFormats, UINT NumFormats) { NumRenderTargets = NumFormats; memcpy(RTFormats, pFormats, sizeof(RTFormats)); // assumes ARRAY_SIZE(pFormats) == ARRAY_SIZE(RTFormats) } }; //------------------------------------------------------------------------------------------------ // Pipeline State Stream Helpers //------------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------------ // Stream Subobjects, i.e. elements of a stream struct DefaultSampleMask { operator UINT() { return UINT_MAX; } }; struct DefaultSampleDesc { operator DXGI_SAMPLE_DESC() { return DXGI_SAMPLE_DESC{1, 0}; } }; #pragma warning(push) #pragma warning(disable : 4324) template class alignas(void*) CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT { private: D3D12_PIPELINE_STATE_SUBOBJECT_TYPE _Type; InnerStructType _Inner; public: CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT() noexcept : _Type(Type), _Inner(DefaultArg()) {} CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT(InnerStructType const& i) : _Type(Type), _Inner(i) {} CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT& operator=(InnerStructType const& i) { _Inner = i; return *this; } operator InnerStructType const&() const { return _Inner; } operator InnerStructType&() { return _Inner; } InnerStructType* operator&() { return &_Inner; } InnerStructType const* operator&() const { return &_Inner; } }; #pragma warning(pop) typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_PIPELINE_STATE_FLAGS, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_FLAGS> CD3DX12_PIPELINE_STATE_STREAM_FLAGS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< UINT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_NODE_MASK> CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< ID3D12RootSignature*, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE> CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_INPUT_LAYOUT_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT> CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE> CD3DX12_PIPELINE_STATE_STREAM_IB_STRIP_CUT_VALUE; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_PRIMITIVE_TOPOLOGY_TYPE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY> CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS> CD3DX12_PIPELINE_STATE_STREAM_VS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS> CD3DX12_PIPELINE_STATE_STREAM_GS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_STREAM_OUTPUT_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_STREAM_OUTPUT> CD3DX12_PIPELINE_STATE_STREAM_STREAM_OUTPUT; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS> CD3DX12_PIPELINE_STATE_STREAM_HS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS> CD3DX12_PIPELINE_STATE_STREAM_DS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS> CD3DX12_PIPELINE_STATE_STREAM_PS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_AS> CD3DX12_PIPELINE_STATE_STREAM_AS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MS> CD3DX12_PIPELINE_STATE_STREAM_MS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_SHADER_BYTECODE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS> CD3DX12_PIPELINE_STATE_STREAM_CS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_BLEND_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_DEPTH_STENCIL_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_DEPTH_STENCIL_DESC1, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< DXGI_FORMAT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT> CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_RASTERIZER_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_RT_FORMAT_ARRAY, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS> CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< DXGI_SAMPLE_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC, DefaultSampleDesc> CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< UINT, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK, DefaultSampleMask> CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< D3D12_CACHED_PIPELINE_STATE, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CACHED_PSO> CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO; typedef CD3DX12_PIPELINE_STATE_STREAM_SUBOBJECT< CD3DX12_VIEW_INSTANCING_DESC, D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING, CD3DX12_DEFAULT> CD3DX12_PIPELINE_STATE_STREAM_VIEW_INSTANCING; //------------------------------------------------------------------------------------------------ // Stream Parser Helpers struct ID3DX12PipelineParserCallbacks { // Subobject Callbacks virtual void FlagsCb(D3D12_PIPELINE_STATE_FLAGS) {} virtual void NodeMaskCb(UINT) {} virtual void RootSignatureCb(ID3D12RootSignature*) {} virtual void InputLayoutCb(const D3D12_INPUT_LAYOUT_DESC&) {} virtual void IBStripCutValueCb(D3D12_INDEX_BUFFER_STRIP_CUT_VALUE) {} virtual void PrimitiveTopologyTypeCb(D3D12_PRIMITIVE_TOPOLOGY_TYPE) {} virtual void VSCb(const D3D12_SHADER_BYTECODE&) {} virtual void GSCb(const D3D12_SHADER_BYTECODE&) {} virtual void StreamOutputCb(const D3D12_STREAM_OUTPUT_DESC&) {} virtual void HSCb(const D3D12_SHADER_BYTECODE&) {} virtual void DSCb(const D3D12_SHADER_BYTECODE&) {} virtual void PSCb(const D3D12_SHADER_BYTECODE&) {} virtual void CSCb(const D3D12_SHADER_BYTECODE&) {} virtual void ASCb(const D3D12_SHADER_BYTECODE&) {} virtual void MSCb(const D3D12_SHADER_BYTECODE&) {} virtual void BlendStateCb(const D3D12_BLEND_DESC&) {} virtual void DepthStencilStateCb(const D3D12_DEPTH_STENCIL_DESC&) {} virtual void DepthStencilState1Cb(const D3D12_DEPTH_STENCIL_DESC1&) {} virtual void DSVFormatCb(DXGI_FORMAT) {} virtual void RasterizerStateCb(const D3D12_RASTERIZER_DESC&) {} virtual void RTVFormatsCb(const D3D12_RT_FORMAT_ARRAY&) {} virtual void SampleDescCb(const DXGI_SAMPLE_DESC&) {} virtual void SampleMaskCb(UINT) {} virtual void ViewInstancingCb(const D3D12_VIEW_INSTANCING_DESC&) {} virtual void CachedPSOCb(const D3D12_CACHED_PIPELINE_STATE&) {} // Error Callbacks virtual void ErrorBadInputParameter(UINT /*ParameterIndex*/) {} virtual void ErrorDuplicateSubobject(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE /*DuplicateType*/) {} virtual void ErrorUnknownSubobject(UINT /*UnknownTypeValue*/) {} virtual ~ID3DX12PipelineParserCallbacks() = default; }; struct D3DX12_MESH_SHADER_PIPELINE_STATE_DESC { ID3D12RootSignature* pRootSignature; D3D12_SHADER_BYTECODE AS; D3D12_SHADER_BYTECODE MS; D3D12_SHADER_BYTECODE PS; D3D12_BLEND_DESC BlendState; UINT SampleMask; D3D12_RASTERIZER_DESC RasterizerState; D3D12_DEPTH_STENCIL_DESC DepthStencilState; D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBStripCutValue; D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType; UINT NumRenderTargets; DXGI_FORMAT RTVFormats[ D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT ]; DXGI_FORMAT DSVFormat; DXGI_SAMPLE_DESC SampleDesc; UINT NodeMask; D3D12_CACHED_PIPELINE_STATE CachedPSO; D3D12_PIPELINE_STATE_FLAGS Flags; }; // CD3DX12_PIPELINE_STATE_STREAM1 Works on RS3+ (where there is a new view instancing subobject). // Use CD3DX12_PIPELINE_STATE_STREAM for RS2+ support. struct CD3DX12_PIPELINE_STATE_STREAM1 { CD3DX12_PIPELINE_STATE_STREAM1() = default; // Mesh and amplification shaders must be set manually, since they do not have representation in D3D12_GRAPHICS_PIPELINE_STATE_DESC CD3DX12_PIPELINE_STATE_STREAM1(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& Desc) : Flags(Desc.Flags) , NodeMask(Desc.NodeMask) , pRootSignature(Desc.pRootSignature) , InputLayout(Desc.InputLayout) , IBStripCutValue(Desc.IBStripCutValue) , PrimitiveTopologyType(Desc.PrimitiveTopologyType) , VS(Desc.VS) , GS(Desc.GS) , StreamOutput(Desc.StreamOutput) , HS(Desc.HS) , DS(Desc.DS) , PS(Desc.PS) , AS() , MS() , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) , DSVFormat(Desc.DSVFormat) , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) , SampleDesc(Desc.SampleDesc) , SampleMask(Desc.SampleMask) , CachedPSO(Desc.CachedPSO) , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) {} CD3DX12_PIPELINE_STATE_STREAM1(const D3DX12_MESH_SHADER_PIPELINE_STATE_DESC& Desc) : Flags(Desc.Flags) , NodeMask(Desc.NodeMask) , pRootSignature(Desc.pRootSignature) , IBStripCutValue(Desc.IBStripCutValue) , PrimitiveTopologyType(Desc.PrimitiveTopologyType) , PS(Desc.PS) , AS() , MS() , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) , DSVFormat(Desc.DSVFormat) , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) , SampleDesc(Desc.SampleDesc) , SampleMask(Desc.SampleMask) , CachedPSO(Desc.CachedPSO) , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) {} CD3DX12_PIPELINE_STATE_STREAM1(const D3D12_COMPUTE_PIPELINE_STATE_DESC& Desc) : Flags(Desc.Flags) , NodeMask(Desc.NodeMask) , pRootSignature(Desc.pRootSignature) , CS(CD3DX12_SHADER_BYTECODE(Desc.CS)) , CachedPSO(Desc.CachedPSO) { static_cast(DepthStencilState).DepthEnable = false; } CD3DX12_PIPELINE_STATE_STREAM_FLAGS Flags; CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT InputLayout; CD3DX12_PIPELINE_STATE_STREAM_IB_STRIP_CUT_VALUE IBStripCutValue; CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopologyType; CD3DX12_PIPELINE_STATE_STREAM_VS VS; CD3DX12_PIPELINE_STATE_STREAM_GS GS; CD3DX12_PIPELINE_STATE_STREAM_STREAM_OUTPUT StreamOutput; CD3DX12_PIPELINE_STATE_STREAM_HS HS; CD3DX12_PIPELINE_STATE_STREAM_DS DS; CD3DX12_PIPELINE_STATE_STREAM_PS PS; CD3DX12_PIPELINE_STATE_STREAM_AS AS; CD3DX12_PIPELINE_STATE_STREAM_MS MS; CD3DX12_PIPELINE_STATE_STREAM_CS CS; CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat; CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState; CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask; CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO CachedPSO; CD3DX12_PIPELINE_STATE_STREAM_VIEW_INSTANCING ViewInstancingDesc; D3D12_GRAPHICS_PIPELINE_STATE_DESC GraphicsDescV0() const { D3D12_GRAPHICS_PIPELINE_STATE_DESC D; D.Flags = this->Flags; D.NodeMask = this->NodeMask; D.pRootSignature = this->pRootSignature; D.InputLayout = this->InputLayout; D.IBStripCutValue = this->IBStripCutValue; D.PrimitiveTopologyType = this->PrimitiveTopologyType; D.VS = this->VS; D.GS = this->GS; D.StreamOutput = this->StreamOutput; D.HS = this->HS; D.DS = this->DS; D.PS = this->PS; D.BlendState = this->BlendState; D.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEPTH_STENCIL_DESC1(this->DepthStencilState)); D.DSVFormat = this->DSVFormat; D.RasterizerState = this->RasterizerState; D.NumRenderTargets = D3D12_RT_FORMAT_ARRAY(this->RTVFormats).NumRenderTargets; memcpy(D.RTVFormats, D3D12_RT_FORMAT_ARRAY(this->RTVFormats).RTFormats, sizeof(D.RTVFormats)); D.SampleDesc = this->SampleDesc; D.SampleMask = this->SampleMask; D.CachedPSO = this->CachedPSO; return D; } D3D12_COMPUTE_PIPELINE_STATE_DESC ComputeDescV0() const { D3D12_COMPUTE_PIPELINE_STATE_DESC D; D.Flags = this->Flags; D.NodeMask = this->NodeMask; D.pRootSignature = this->pRootSignature; D.CS = this->CS; D.CachedPSO = this->CachedPSO; return D; } }; struct CD3DX12_PIPELINE_MESH_STATE_STREAM { CD3DX12_PIPELINE_MESH_STATE_STREAM() = default; CD3DX12_PIPELINE_MESH_STATE_STREAM(const D3DX12_MESH_SHADER_PIPELINE_STATE_DESC& Desc) : Flags(Desc.Flags) , NodeMask(Desc.NodeMask) , pRootSignature(Desc.pRootSignature) , PS(Desc.PS) , AS() , MS() , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) , DSVFormat(Desc.DSVFormat) , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) , SampleDesc(Desc.SampleDesc) , SampleMask(Desc.SampleMask) , CachedPSO(Desc.CachedPSO) , ViewInstancingDesc(CD3DX12_VIEW_INSTANCING_DESC(CD3DX12_DEFAULT())) {} CD3DX12_PIPELINE_STATE_STREAM_FLAGS Flags; CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; CD3DX12_PIPELINE_STATE_STREAM_PS PS; CD3DX12_PIPELINE_STATE_STREAM_AS AS; CD3DX12_PIPELINE_STATE_STREAM_MS MS; CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat; CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState; CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask; CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO CachedPSO; CD3DX12_PIPELINE_STATE_STREAM_VIEW_INSTANCING ViewInstancingDesc; D3DX12_MESH_SHADER_PIPELINE_STATE_DESC MeshShaderDescV0() const { D3DX12_MESH_SHADER_PIPELINE_STATE_DESC D; D.Flags = this->Flags; D.NodeMask = this->NodeMask; D.pRootSignature = this->pRootSignature; D.PS = this->PS; D.AS = this->AS; D.MS = this->MS; D.BlendState = this->BlendState; D.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEPTH_STENCIL_DESC1(this->DepthStencilState)); D.DSVFormat = this->DSVFormat; D.RasterizerState = this->RasterizerState; D.NumRenderTargets = D3D12_RT_FORMAT_ARRAY(this->RTVFormats).NumRenderTargets; memcpy(D.RTVFormats, D3D12_RT_FORMAT_ARRAY(this->RTVFormats).RTFormats, sizeof(D.RTVFormats)); D.SampleDesc = this->SampleDesc; D.SampleMask = this->SampleMask; D.CachedPSO = this->CachedPSO; return D; } }; // CD3DX12_PIPELINE_STATE_STREAM works on RS2+ but does not support new subobject(s) added in RS3+. // See CD3DX12_PIPELINE_STATE_STREAM1 for instance. struct CD3DX12_PIPELINE_STATE_STREAM { CD3DX12_PIPELINE_STATE_STREAM() = default; CD3DX12_PIPELINE_STATE_STREAM(const D3D12_GRAPHICS_PIPELINE_STATE_DESC& Desc) : Flags(Desc.Flags) , NodeMask(Desc.NodeMask) , pRootSignature(Desc.pRootSignature) , InputLayout(Desc.InputLayout) , IBStripCutValue(Desc.IBStripCutValue) , PrimitiveTopologyType(Desc.PrimitiveTopologyType) , VS(Desc.VS) , GS(Desc.GS) , StreamOutput(Desc.StreamOutput) , HS(Desc.HS) , DS(Desc.DS) , PS(Desc.PS) , BlendState(CD3DX12_BLEND_DESC(Desc.BlendState)) , DepthStencilState(CD3DX12_DEPTH_STENCIL_DESC1(Desc.DepthStencilState)) , DSVFormat(Desc.DSVFormat) , RasterizerState(CD3DX12_RASTERIZER_DESC(Desc.RasterizerState)) , RTVFormats(CD3DX12_RT_FORMAT_ARRAY(Desc.RTVFormats, Desc.NumRenderTargets)) , SampleDesc(Desc.SampleDesc) , SampleMask(Desc.SampleMask) , CachedPSO(Desc.CachedPSO) {} CD3DX12_PIPELINE_STATE_STREAM(const D3D12_COMPUTE_PIPELINE_STATE_DESC& Desc) : Flags(Desc.Flags) , NodeMask(Desc.NodeMask) , pRootSignature(Desc.pRootSignature) , CS(CD3DX12_SHADER_BYTECODE(Desc.CS)) , CachedPSO(Desc.CachedPSO) {} CD3DX12_PIPELINE_STATE_STREAM_FLAGS Flags; CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature; CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT InputLayout; CD3DX12_PIPELINE_STATE_STREAM_IB_STRIP_CUT_VALUE IBStripCutValue; CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopologyType; CD3DX12_PIPELINE_STATE_STREAM_VS VS; CD3DX12_PIPELINE_STATE_STREAM_GS GS; CD3DX12_PIPELINE_STATE_STREAM_STREAM_OUTPUT StreamOutput; CD3DX12_PIPELINE_STATE_STREAM_HS HS; CD3DX12_PIPELINE_STATE_STREAM_DS DS; CD3DX12_PIPELINE_STATE_STREAM_PS PS; CD3DX12_PIPELINE_STATE_STREAM_CS CS; CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat; CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState; CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask; CD3DX12_PIPELINE_STATE_STREAM_CACHED_PSO CachedPSO; D3D12_GRAPHICS_PIPELINE_STATE_DESC GraphicsDescV0() const { D3D12_GRAPHICS_PIPELINE_STATE_DESC D; D.Flags = this->Flags; D.NodeMask = this->NodeMask; D.pRootSignature = this->pRootSignature; D.InputLayout = this->InputLayout; D.IBStripCutValue = this->IBStripCutValue; D.PrimitiveTopologyType = this->PrimitiveTopologyType; D.VS = this->VS; D.GS = this->GS; D.StreamOutput = this->StreamOutput; D.HS = this->HS; D.DS = this->DS; D.PS = this->PS; D.BlendState = this->BlendState; D.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(D3D12_DEPTH_STENCIL_DESC1(this->DepthStencilState)); D.DSVFormat = this->DSVFormat; D.RasterizerState = this->RasterizerState; D.NumRenderTargets = D3D12_RT_FORMAT_ARRAY(this->RTVFormats).NumRenderTargets; memcpy(D.RTVFormats, D3D12_RT_FORMAT_ARRAY(this->RTVFormats).RTFormats, sizeof(D.RTVFormats)); D.SampleDesc = this->SampleDesc; D.SampleMask = this->SampleMask; D.CachedPSO = this->CachedPSO; return D; } D3D12_COMPUTE_PIPELINE_STATE_DESC ComputeDescV0() const { D3D12_COMPUTE_PIPELINE_STATE_DESC D; D.Flags = this->Flags; D.NodeMask = this->NodeMask; D.pRootSignature = this->pRootSignature; D.CS = this->CS; D.CachedPSO = this->CachedPSO; return D; } }; struct CD3DX12_PIPELINE_STATE_STREAM_PARSE_HELPER : public ID3DX12PipelineParserCallbacks { CD3DX12_PIPELINE_STATE_STREAM1 PipelineStream; CD3DX12_PIPELINE_STATE_STREAM_PARSE_HELPER() noexcept : SeenDSS(false) { // Adjust defaults to account for absent members. PipelineStream.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; // Depth disabled if no DSV format specified. static_cast(PipelineStream.DepthStencilState).DepthEnable = false; } // ID3DX12PipelineParserCallbacks void FlagsCb(D3D12_PIPELINE_STATE_FLAGS Flags) override {PipelineStream.Flags = Flags;} void NodeMaskCb(UINT NodeMask) override {PipelineStream.NodeMask = NodeMask;} void RootSignatureCb(ID3D12RootSignature* pRootSignature) override {PipelineStream.pRootSignature = pRootSignature;} void InputLayoutCb(const D3D12_INPUT_LAYOUT_DESC& InputLayout) override {PipelineStream.InputLayout = InputLayout;} void IBStripCutValueCb(D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBStripCutValue) override {PipelineStream.IBStripCutValue = IBStripCutValue;} void PrimitiveTopologyTypeCb(D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType) override {PipelineStream.PrimitiveTopologyType = PrimitiveTopologyType;} void VSCb(const D3D12_SHADER_BYTECODE& VS) override {PipelineStream.VS = VS;} void GSCb(const D3D12_SHADER_BYTECODE& GS) override {PipelineStream.GS = GS;} void StreamOutputCb(const D3D12_STREAM_OUTPUT_DESC& StreamOutput) override {PipelineStream.StreamOutput = StreamOutput;} void HSCb(const D3D12_SHADER_BYTECODE& HS) override {PipelineStream.HS = HS;} void DSCb(const D3D12_SHADER_BYTECODE& DS) override {PipelineStream.DS = DS;} void PSCb(const D3D12_SHADER_BYTECODE& PS) override {PipelineStream.PS = PS;} void CSCb(const D3D12_SHADER_BYTECODE& CS) override {PipelineStream.CS = CS;} void ASCb(const D3D12_SHADER_BYTECODE& AS) override {PipelineStream.AS = AS;} void MSCb(const D3D12_SHADER_BYTECODE& MS) override {PipelineStream.MS = MS;} void BlendStateCb(const D3D12_BLEND_DESC& BlendState) override {PipelineStream.BlendState = CD3DX12_BLEND_DESC(BlendState);} void DepthStencilStateCb(const D3D12_DEPTH_STENCIL_DESC& DepthStencilState) override { PipelineStream.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(DepthStencilState); SeenDSS = true; } void DepthStencilState1Cb(const D3D12_DEPTH_STENCIL_DESC1& DepthStencilState) override { PipelineStream.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC1(DepthStencilState); SeenDSS = true; } void DSVFormatCb(DXGI_FORMAT DSVFormat) override { PipelineStream.DSVFormat = DSVFormat; if (!SeenDSS && DSVFormat != DXGI_FORMAT_UNKNOWN) { // Re-enable depth for the default state. static_cast(PipelineStream.DepthStencilState).DepthEnable = true; } } void RasterizerStateCb(const D3D12_RASTERIZER_DESC& RasterizerState) override {PipelineStream.RasterizerState = CD3DX12_RASTERIZER_DESC(RasterizerState);} void RTVFormatsCb(const D3D12_RT_FORMAT_ARRAY& RTVFormats) override {PipelineStream.RTVFormats = RTVFormats;} void SampleDescCb(const DXGI_SAMPLE_DESC& SampleDesc) override {PipelineStream.SampleDesc = SampleDesc;} void SampleMaskCb(UINT SampleMask) override {PipelineStream.SampleMask = SampleMask;} void ViewInstancingCb(const D3D12_VIEW_INSTANCING_DESC& ViewInstancingDesc) override {PipelineStream.ViewInstancingDesc = CD3DX12_VIEW_INSTANCING_DESC(ViewInstancingDesc);} void CachedPSOCb(const D3D12_CACHED_PIPELINE_STATE& CachedPSO) override {PipelineStream.CachedPSO = CachedPSO;} private: bool SeenDSS; }; inline D3D12_PIPELINE_STATE_SUBOBJECT_TYPE D3DX12GetBaseSubobjectType(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE SubobjectType) { switch (SubobjectType) { case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1: return D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL; default: return SubobjectType; } } inline HRESULT D3DX12ParsePipelineStream(const D3D12_PIPELINE_STATE_STREAM_DESC& Desc, ID3DX12PipelineParserCallbacks* pCallbacks) { if (pCallbacks == nullptr) { return E_INVALIDARG; } if (Desc.SizeInBytes == 0 || Desc.pPipelineStateSubobjectStream == nullptr) { pCallbacks->ErrorBadInputParameter(1); // first parameter issue return E_INVALIDARG; } bool SubobjectSeen[D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MAX_VALID] = {}; for (SIZE_T CurOffset = 0, SizeOfSubobject = 0; CurOffset < Desc.SizeInBytes; CurOffset += SizeOfSubobject) { BYTE* pStream = static_cast(Desc.pPipelineStateSubobjectStream)+CurOffset; auto SubobjectType = *reinterpret_cast(pStream); if (SubobjectType < 0 || SubobjectType >= D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MAX_VALID) { pCallbacks->ErrorUnknownSubobject(SubobjectType); return E_INVALIDARG; } if (SubobjectSeen[D3DX12GetBaseSubobjectType(SubobjectType)]) { pCallbacks->ErrorDuplicateSubobject(SubobjectType); return E_INVALIDARG; // disallow subobject duplicates in a stream } SubobjectSeen[SubobjectType] = true; switch (SubobjectType) { case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE: pCallbacks->RootSignatureCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::pRootSignature); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VS: pCallbacks->VSCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::VS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS: pCallbacks->PSCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::PS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DS: pCallbacks->DSCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::DS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_HS: pCallbacks->HSCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::HS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_GS: pCallbacks->GSCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::GS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CS: pCallbacks->CSCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::CS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_AS: pCallbacks->ASCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM1::AS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MS: pCallbacks->MSCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM1::MS); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_STREAM_OUTPUT: pCallbacks->StreamOutputCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::StreamOutput); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND: pCallbacks->BlendStateCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::BlendState); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK: pCallbacks->SampleMaskCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::SampleMask); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER: pCallbacks->RasterizerStateCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::RasterizerState); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL: pCallbacks->DepthStencilStateCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL1: pCallbacks->DepthStencilState1Cb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::DepthStencilState); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_INPUT_LAYOUT: pCallbacks->InputLayoutCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::InputLayout); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_IB_STRIP_CUT_VALUE: pCallbacks->IBStripCutValueCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::IBStripCutValue); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY: pCallbacks->PrimitiveTopologyTypeCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::PrimitiveTopologyType); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS: pCallbacks->RTVFormatsCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::RTVFormats); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT: pCallbacks->DSVFormatCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::DSVFormat); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC: pCallbacks->SampleDescCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::SampleDesc); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_NODE_MASK: pCallbacks->NodeMaskCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::NodeMask); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CACHED_PSO: pCallbacks->CachedPSOCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::CachedPSO); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_FLAGS: pCallbacks->FlagsCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM::Flags); break; case D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_VIEW_INSTANCING: pCallbacks->ViewInstancingCb(*reinterpret_cast(pStream)); SizeOfSubobject = sizeof(CD3DX12_PIPELINE_STATE_STREAM1::ViewInstancingDesc); break; default: pCallbacks->ErrorUnknownSubobject(SubobjectType); return E_INVALIDARG; break; } } return S_OK; } //------------------------------------------------------------------------------------------------ inline bool operator==( const D3D12_CLEAR_VALUE &a, const D3D12_CLEAR_VALUE &b) { if (a.Format != b.Format) return false; if (a.Format == DXGI_FORMAT_D24_UNORM_S8_UINT || a.Format == DXGI_FORMAT_D16_UNORM || a.Format == DXGI_FORMAT_D32_FLOAT || a.Format == DXGI_FORMAT_D32_FLOAT_S8X24_UINT) { return (a.DepthStencil.Depth == b.DepthStencil.Depth) && (a.DepthStencil.Stencil == b.DepthStencil.Stencil); } else { return (a.Color[0] == b.Color[0]) && (a.Color[1] == b.Color[1]) && (a.Color[2] == b.Color[2]) && (a.Color[3] == b.Color[3]); } } inline bool operator==( const D3D12_RENDER_PASS_BEGINNING_ACCESS_CLEAR_PARAMETERS &a, const D3D12_RENDER_PASS_BEGINNING_ACCESS_CLEAR_PARAMETERS &b) { return a.ClearValue == b.ClearValue; } inline bool operator==( const D3D12_RENDER_PASS_ENDING_ACCESS_RESOLVE_PARAMETERS &a, const D3D12_RENDER_PASS_ENDING_ACCESS_RESOLVE_PARAMETERS &b) { if (a.pSrcResource != b.pSrcResource) return false; if (a.pDstResource != b.pDstResource) return false; if (a.SubresourceCount != b.SubresourceCount) return false; if (a.Format != b.Format) return false; if (a.ResolveMode != b.ResolveMode) return false; if (a.PreserveResolveSource != b.PreserveResolveSource) return false; return true; } inline bool operator==( const D3D12_RENDER_PASS_BEGINNING_ACCESS &a, const D3D12_RENDER_PASS_BEGINNING_ACCESS &b) { if (a.Type != b.Type) return false; if (a.Type == D3D12_RENDER_PASS_BEGINNING_ACCESS_TYPE_CLEAR && !(a.Clear == b.Clear)) return false; return true; } inline bool operator==( const D3D12_RENDER_PASS_ENDING_ACCESS &a, const D3D12_RENDER_PASS_ENDING_ACCESS &b) { if (a.Type != b.Type) return false; if (a.Type == D3D12_RENDER_PASS_ENDING_ACCESS_TYPE_RESOLVE && !(a.Resolve == b.Resolve)) return false; return true; } inline bool operator==( const D3D12_RENDER_PASS_RENDER_TARGET_DESC &a, const D3D12_RENDER_PASS_RENDER_TARGET_DESC &b) { if (a.cpuDescriptor.ptr != b.cpuDescriptor.ptr) return false; if (!(a.BeginningAccess == b.BeginningAccess)) return false; if (!(a.EndingAccess == b.EndingAccess)) return false; return true; } inline bool operator==( const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC &a, const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC &b) { if (a.cpuDescriptor.ptr != b.cpuDescriptor.ptr) return false; if (!(a.DepthBeginningAccess == b.DepthBeginningAccess)) return false; if (!(a.StencilBeginningAccess == b.StencilBeginningAccess)) return false; if (!(a.DepthEndingAccess == b.DepthEndingAccess)) return false; if (!(a.StencilEndingAccess == b.StencilEndingAccess)) return false; return true; } #ifndef D3DX12_NO_STATE_OBJECT_HELPERS //================================================================================================ // D3DX12 State Object Creation Helpers // // Helper classes for creating new style state objects out of an arbitrary set of subobjects. // Uses STL // // Start by instantiating CD3DX12_STATE_OBJECT_DESC (see it's public methods). // One of its methods is CreateSubobject(), which has a comment showing a couple of options for // defining subobjects using the helper classes for each subobject (CD3DX12_DXIL_LIBRARY_SUBOBJECT // etc.). The subobject helpers each have methods specific to the subobject for configuring it's // contents. // //================================================================================================ #include #include #include #include #ifndef D3DX12_USE_ATL #include #define D3DX12_COM_PTR Microsoft::WRL::ComPtr #define D3DX12_COM_PTR_GET(x) x.Get() #define D3DX12_COM_PTR_ADDRESSOF(x) x.GetAddressOf() #else #include #define D3DX12_COM_PTR ATL::CComPtr #define D3DX12_COM_PTR_GET(x) x.p #define D3DX12_COM_PTR_ADDRESSOF(x) &x.p #endif //------------------------------------------------------------------------------------------------ class CD3DX12_STATE_OBJECT_DESC { public: CD3DX12_STATE_OBJECT_DESC() { Init(D3D12_STATE_OBJECT_TYPE_COLLECTION); } CD3DX12_STATE_OBJECT_DESC(D3D12_STATE_OBJECT_TYPE Type) { Init(Type); } void SetStateObjectType(D3D12_STATE_OBJECT_TYPE Type) { m_Desc.Type = Type; } operator const D3D12_STATE_OBJECT_DESC&() { // Do final preparation work m_RepointedAssociations.clear(); m_SubobjectArray.clear(); m_SubobjectArray.reserve(m_Desc.NumSubobjects); // Flatten subobjects into an array (each flattened subobject still has a // member that's a pointer to it's desc that's not flattened) for (auto Iter = m_SubobjectList.begin(); Iter != m_SubobjectList.end(); Iter++) { m_SubobjectArray.push_back(*Iter); // Store new location in array so we can redirect pointers contained in subobjects Iter->pSubobjectArrayLocation = &m_SubobjectArray.back(); } // For subobjects with pointer fields, create a new copy of those subobject definitions // with fixed pointers for (UINT i = 0; i < m_Desc.NumSubobjects; i++) { if (m_SubobjectArray[i].Type == D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION) { auto pOriginalSubobjectAssociation = reinterpret_cast(m_SubobjectArray[i].pDesc); D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION Repointed = *pOriginalSubobjectAssociation; auto pWrapper = static_cast(pOriginalSubobjectAssociation->pSubobjectToAssociate); Repointed.pSubobjectToAssociate = pWrapper->pSubobjectArrayLocation; m_RepointedAssociations.push_back(Repointed); m_SubobjectArray[i].pDesc = &m_RepointedAssociations.back(); } } // Below: using ugly way to get pointer in case .data() is not defined m_Desc.pSubobjects = m_Desc.NumSubobjects ? &m_SubobjectArray[0] : nullptr; return m_Desc; } operator const D3D12_STATE_OBJECT_DESC*() { // Cast calls the above final preparation work return &static_cast(*this); } // CreateSubobject creates a sububject helper (e.g. CD3DX12_HIT_GROUP_SUBOBJECT) // whose lifetime is owned by this class. // e.g. // // CD3DX12_STATE_OBJECT_DESC Collection1(D3D12_STATE_OBJECT_TYPE_COLLECTION); // auto Lib0 = Collection1.CreateSubobject(); // Lib0->SetDXILLibrary(&pMyAppDxilLibs[0]); // Lib0->DefineExport(L"rayGenShader0"); // in practice these export listings might be // // data/engine driven // etc. // // Alternatively, users can instantiate sububject helpers explicitly, such as via local // variables instead, passing the state object desc that should point to it into the helper // constructor (or call mySubobjectHelper.AddToStateObject(Collection1)). // In this alternative scenario, the user must keep the subobject alive as long as the state // object it is associated with is alive, else it's pointer references will be stale. // e.g. // // CD3DX12_STATE_OBJECT_DESC RaytracingState2(D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE); // CD3DX12_DXIL_LIBRARY_SUBOBJECT LibA(RaytracingState2); // LibA.SetDXILLibrary(&pMyAppDxilLibs[4]); // not manually specifying exports // // - meaning all exports in the libraries // // are exported // etc. template T* CreateSubobject() { T* pSubobject = new T(*this); m_OwnedSubobjectHelpers.emplace_back(pSubobject); return pSubobject; } private: D3D12_STATE_SUBOBJECT* TrackSubobject(D3D12_STATE_SUBOBJECT_TYPE Type, void* pDesc) { SUBOBJECT_WRAPPER Subobject; Subobject.pSubobjectArrayLocation = nullptr; Subobject.Type = Type; Subobject.pDesc = pDesc; m_SubobjectList.push_back(Subobject); m_Desc.NumSubobjects++; return &m_SubobjectList.back(); } void Init(D3D12_STATE_OBJECT_TYPE Type) { SetStateObjectType(Type); m_Desc.pSubobjects = nullptr; m_Desc.NumSubobjects = 0; m_SubobjectList.clear(); m_SubobjectArray.clear(); m_RepointedAssociations.clear(); } typedef struct SUBOBJECT_WRAPPER : public D3D12_STATE_SUBOBJECT { D3D12_STATE_SUBOBJECT* pSubobjectArrayLocation; // new location when flattened into array // for repointing pointers in subobjects } SUBOBJECT_WRAPPER; D3D12_STATE_OBJECT_DESC m_Desc; std::list m_SubobjectList; // Pointers to list nodes handed out so // these can be edited live std::vector m_SubobjectArray; // Built at the end, copying list contents std::list m_RepointedAssociations; // subobject type that contains pointers to other subobjects, // repointed to flattened array class StringContainer { public: LPCWSTR LocalCopy(LPCWSTR string, bool bSingleString = false) { if (string) { if (bSingleString) { m_Strings.clear(); m_Strings.push_back(string); } else { m_Strings.push_back(string); } return m_Strings.back().c_str(); } else { return nullptr; } } void clear() { m_Strings.clear(); } private: std::list m_Strings; }; class SUBOBJECT_HELPER_BASE { public: SUBOBJECT_HELPER_BASE() { Init(); } virtual ~SUBOBJECT_HELPER_BASE() {} virtual D3D12_STATE_SUBOBJECT_TYPE Type() const = 0; void AddToStateObject(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { m_pSubobject = ContainingStateObject.TrackSubobject(Type(), Data()); } protected: virtual void* Data() = 0; void Init() { m_pSubobject = nullptr; } D3D12_STATE_SUBOBJECT* m_pSubobject; }; #if(__cplusplus >= 201103L) std::list> m_OwnedSubobjectHelpers; #else class OWNED_HELPER { public: OWNED_HELPER(const SUBOBJECT_HELPER_BASE* pHelper) { m_pHelper = pHelper; } ~OWNED_HELPER() { delete m_pHelper; } const SUBOBJECT_HELPER_BASE* m_pHelper; }; std::list m_OwnedSubobjectHelpers; #endif friend class CD3DX12_DXIL_LIBRARY_SUBOBJECT; friend class CD3DX12_EXISTING_COLLECTION_SUBOBJECT; friend class CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT; friend class CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION; friend class CD3DX12_HIT_GROUP_SUBOBJECT; friend class CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT; friend class CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT; friend class CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT; friend class CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT; friend class CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT; friend class CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT; friend class CD3DX12_NODE_MASK_SUBOBJECT; }; //------------------------------------------------------------------------------------------------ class CD3DX12_DXIL_LIBRARY_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_DXIL_LIBRARY_SUBOBJECT() { Init(); } CD3DX12_DXIL_LIBRARY_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetDXILLibrary(D3D12_SHADER_BYTECODE*pCode) { static const D3D12_SHADER_BYTECODE Default = {}; m_Desc.DXILLibrary = pCode ? *pCode : Default; } void DefineExport( LPCWSTR Name, LPCWSTR ExportToRename = nullptr, D3D12_EXPORT_FLAGS Flags = D3D12_EXPORT_FLAG_NONE) { D3D12_EXPORT_DESC Export; Export.Name = m_Strings.LocalCopy(Name); Export.ExportToRename = m_Strings.LocalCopy(ExportToRename); Export.Flags = Flags; m_Exports.push_back(Export); m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined m_Desc.NumExports = static_cast(m_Exports.size()); } template void DefineExports(LPCWSTR(&Exports)[N]) { for (UINT i = 0; i < N; i++) { DefineExport(Exports[i]); } } void DefineExports(LPCWSTR* Exports, UINT N) { for (UINT i = 0; i < N; i++) { DefineExport(Exports[i]); } } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_DXIL_LIBRARY_DESC&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; m_Strings.clear(); m_Exports.clear(); } void* Data() { return &m_Desc; } D3D12_DXIL_LIBRARY_DESC m_Desc; CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; std::vector m_Exports; }; //------------------------------------------------------------------------------------------------ class CD3DX12_EXISTING_COLLECTION_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_EXISTING_COLLECTION_SUBOBJECT() { Init(); } CD3DX12_EXISTING_COLLECTION_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetExistingCollection(ID3D12StateObject*pExistingCollection) { m_Desc.pExistingCollection = pExistingCollection; m_CollectionRef = pExistingCollection; } void DefineExport( LPCWSTR Name, LPCWSTR ExportToRename = nullptr, D3D12_EXPORT_FLAGS Flags = D3D12_EXPORT_FLAG_NONE) { D3D12_EXPORT_DESC Export; Export.Name = m_Strings.LocalCopy(Name); Export.ExportToRename = m_Strings.LocalCopy(ExportToRename); Export.Flags = Flags; m_Exports.push_back(Export); m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined m_Desc.NumExports = static_cast(m_Exports.size()); } template void DefineExports(LPCWSTR(&Exports)[N]) { for (UINT i = 0; i < N; i++) { DefineExport(Exports[i]); } } void DefineExports(LPCWSTR* Exports, UINT N) { for (UINT i = 0; i < N; i++) { DefineExport(Exports[i]); } } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_EXISTING_COLLECTION; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_EXISTING_COLLECTION_DESC&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; m_CollectionRef = nullptr; m_Strings.clear(); m_Exports.clear(); } void* Data() { return &m_Desc; } D3D12_EXISTING_COLLECTION_DESC m_Desc; D3DX12_COM_PTR m_CollectionRef; CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; std::vector m_Exports; }; //------------------------------------------------------------------------------------------------ class CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT() { Init(); } CD3DX12_SUBOBJECT_TO_EXPORTS_ASSOCIATION_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetSubobjectToAssociate(const D3D12_STATE_SUBOBJECT& SubobjectToAssociate) { m_Desc.pSubobjectToAssociate = &SubobjectToAssociate; } void AddExport(LPCWSTR Export) { m_Desc.NumExports++; m_Exports.push_back(m_Strings.LocalCopy(Export)); m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined } template void AddExports(LPCWSTR (&Exports)[N]) { for (UINT i = 0; i < N; i++) { AddExport(Exports[i]); } } void AddExports(LPCWSTR* Exports, UINT N) { for (UINT i = 0; i < N; i++) { AddExport(Exports[i]); } } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_SUBOBJECT_TO_EXPORTS_ASSOCIATION; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; m_Strings.clear(); m_Exports.clear(); } void* Data() { return &m_Desc; } D3D12_SUBOBJECT_TO_EXPORTS_ASSOCIATION m_Desc; CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; std::vector m_Exports; }; //------------------------------------------------------------------------------------------------ class CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION() { Init(); } CD3DX12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetSubobjectNameToAssociate(LPCWSTR SubobjectToAssociate) { m_Desc.SubobjectToAssociate = m_SubobjectName.LocalCopy(SubobjectToAssociate, true); } void AddExport(LPCWSTR Export) { m_Desc.NumExports++; m_Exports.push_back(m_Strings.LocalCopy(Export)); m_Desc.pExports = &m_Exports[0]; // using ugly way to get pointer in case .data() is not defined } template void AddExports(LPCWSTR (&Exports)[N]) { for (UINT i = 0; i < N; i++) { AddExport(Exports[i]); } } void AddExports(LPCWSTR* Exports, UINT N) { for (UINT i = 0; i < N; i++) { AddExport(Exports[i]); } } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; m_Strings.clear(); m_SubobjectName.clear(); m_Exports.clear(); } void* Data() { return &m_Desc; } D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION m_Desc; CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings; CD3DX12_STATE_OBJECT_DESC::StringContainer m_SubobjectName; std::vector m_Exports; }; //------------------------------------------------------------------------------------------------ class CD3DX12_HIT_GROUP_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_HIT_GROUP_SUBOBJECT() { Init(); } CD3DX12_HIT_GROUP_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetHitGroupExport(LPCWSTR exportName) { m_Desc.HitGroupExport = m_Strings[0].LocalCopy(exportName, true); } void SetHitGroupType(D3D12_HIT_GROUP_TYPE Type) { m_Desc.Type = Type; } void SetAnyHitShaderImport(LPCWSTR importName) { m_Desc.AnyHitShaderImport = m_Strings[1].LocalCopy(importName, true); } void SetClosestHitShaderImport(LPCWSTR importName) { m_Desc.ClosestHitShaderImport = m_Strings[2].LocalCopy(importName, true); } void SetIntersectionShaderImport(LPCWSTR importName) { m_Desc.IntersectionShaderImport = m_Strings[3].LocalCopy(importName, true); } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_HIT_GROUP_DESC&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; for (UINT i = 0; i < m_NumStrings; i++) { m_Strings[i].clear(); } } void* Data() { return &m_Desc; } D3D12_HIT_GROUP_DESC m_Desc; static const UINT m_NumStrings = 4; CD3DX12_STATE_OBJECT_DESC::StringContainer m_Strings[m_NumStrings]; // one string for every entrypoint name }; //------------------------------------------------------------------------------------------------ class CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT() { Init(); } CD3DX12_RAYTRACING_SHADER_CONFIG_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void Config(UINT MaxPayloadSizeInBytes, UINT MaxAttributeSizeInBytes) { m_Desc.MaxPayloadSizeInBytes = MaxPayloadSizeInBytes; m_Desc.MaxAttributeSizeInBytes = MaxAttributeSizeInBytes; } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_RAYTRACING_SHADER_CONFIG&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; } void* Data() { return &m_Desc; } D3D12_RAYTRACING_SHADER_CONFIG m_Desc; }; //------------------------------------------------------------------------------------------------ class CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT() { Init(); } CD3DX12_RAYTRACING_PIPELINE_CONFIG_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void Config(UINT MaxTraceRecursionDepth) { m_Desc.MaxTraceRecursionDepth = MaxTraceRecursionDepth; } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_RAYTRACING_PIPELINE_CONFIG&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; } void* Data() { return &m_Desc; } D3D12_RAYTRACING_PIPELINE_CONFIG m_Desc; }; //------------------------------------------------------------------------------------------------ class CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT() { Init(); } CD3DX12_RAYTRACING_PIPELINE_CONFIG1_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void Config(UINT MaxTraceRecursionDepth, D3D12_RAYTRACING_PIPELINE_FLAGS Flags) { m_Desc.MaxTraceRecursionDepth = MaxTraceRecursionDepth; m_Desc.Flags = Flags; } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG1; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_RAYTRACING_PIPELINE_CONFIG1&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; } void* Data() { return &m_Desc; } D3D12_RAYTRACING_PIPELINE_CONFIG1 m_Desc; }; //------------------------------------------------------------------------------------------------ class CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT() { Init(); } CD3DX12_GLOBAL_ROOT_SIGNATURE_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetRootSignature(ID3D12RootSignature* pRootSig) { m_pRootSig = pRootSig; } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator ID3D12RootSignature*() const { return D3DX12_COM_PTR_GET(m_pRootSig); } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_pRootSig = nullptr; } void* Data() { return D3DX12_COM_PTR_ADDRESSOF(m_pRootSig); } D3DX12_COM_PTR m_pRootSig; }; //------------------------------------------------------------------------------------------------ class CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT() { Init(); } CD3DX12_LOCAL_ROOT_SIGNATURE_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetRootSignature(ID3D12RootSignature* pRootSig) { m_pRootSig = pRootSig; } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_LOCAL_ROOT_SIGNATURE; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator ID3D12RootSignature*() const { return D3DX12_COM_PTR_GET(m_pRootSig); } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_pRootSig = nullptr; } void* Data() { return D3DX12_COM_PTR_ADDRESSOF(m_pRootSig); } D3DX12_COM_PTR m_pRootSig; }; //------------------------------------------------------------------------------------------------ class CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT() { Init(); } CD3DX12_STATE_OBJECT_CONFIG_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetFlags(D3D12_STATE_OBJECT_FLAGS Flags) { m_Desc.Flags = Flags; } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_STATE_OBJECT_CONFIG; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_STATE_OBJECT_CONFIG&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; } void* Data() { return &m_Desc; } D3D12_STATE_OBJECT_CONFIG m_Desc; }; //------------------------------------------------------------------------------------------------ class CD3DX12_NODE_MASK_SUBOBJECT : public CD3DX12_STATE_OBJECT_DESC::SUBOBJECT_HELPER_BASE { public: CD3DX12_NODE_MASK_SUBOBJECT() { Init(); } CD3DX12_NODE_MASK_SUBOBJECT(CD3DX12_STATE_OBJECT_DESC& ContainingStateObject) { Init(); AddToStateObject(ContainingStateObject); } void SetNodeMask(UINT NodeMask) { m_Desc.NodeMask = NodeMask; } D3D12_STATE_SUBOBJECT_TYPE Type() const { return D3D12_STATE_SUBOBJECT_TYPE_NODE_MASK; } operator const D3D12_STATE_SUBOBJECT&() const { return *m_pSubobject; } operator const D3D12_NODE_MASK&() const { return m_Desc; } private: void Init() { SUBOBJECT_HELPER_BASE::Init(); m_Desc = {}; } void* Data() { return &m_Desc; } D3D12_NODE_MASK m_Desc; }; #undef D3DX12_COM_PTR #undef D3DX12_COM_PTR_GET #undef D3DX12_COM_PTR_ADDRESSOF #endif // #ifndef D3DX12_NO_STATE_OBJECT_HELPERS #endif // defined( __cplusplus ) #endif //__D3DX12_H__ ================================================ FILE: include/Allocator.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class HeapSuballocationBlock : public BlockAllocators::CGenericBlock { public: HeapSuballocationBlock() : BlockAllocators::CGenericBlock(), m_pDirectHeapAllocation(nullptr) {} HeapSuballocationBlock(UINT64 newOffset, UINT64 newSize, ID3D12Resource *pResource = nullptr) : BlockAllocators::CGenericBlock(newOffset, newSize), m_pDirectHeapAllocation(pResource) {} bool IsDirectAllocation() const { return m_pDirectHeapAllocation; } ID3D12Resource *GetDirectHeapAllocation() const { assert(IsDirectAllocation()); return m_pDirectHeapAllocation; } private: ID3D12Resource *m_pDirectHeapAllocation; }; class ImmediateContext; // Forward Declaration class InternalHeapAllocator { public: InternalHeapAllocator(ImmediateContext *pContext, AllocatorHeapType heapType) : m_pContext(pContext), m_HeapType(heapType) {} ID3D12Resource* Allocate(UINT64 size); void Deallocate(ID3D12Resource* pResource); private: ImmediateContext *m_pContext; AllocatorHeapType m_HeapType; }; // Doesn't do any suballocation and instead just allocates a whole new // resource directly for each call to Allocate() // Note: Because it doesn't suballocate at all, it is thread safe by default template class DirectAllocator { private: using InnerAllocatorDecayed = typename std::decay<_InnerAllocator>::type; public: template DirectAllocator(InnerAllocatorArgs&&... innerArgs) : // throw(std::bad_alloc) m_InnerAllocator(std::forward(innerArgs)...) {} DirectAllocator() = default; DirectAllocator(DirectAllocator&&) = default; DirectAllocator& operator=(DirectAllocator&&) = default; _BlockType Allocate(_SizeType size) { _BlockType block(0, size, m_InnerAllocator.Allocate(size)); return block; } void Deallocate(const _BlockType &block) { m_InnerAllocator.Deallocate(block.GetDirectHeapAllocation()); } typedef typename std::invoke_result::type AllocationType; inline bool IsOwner(_In_ const _BlockType &block) const { return block.GetOffset() == 0; } AllocationType GetInnerAllocation(const _BlockType &block) const { return block.GetDirectHeapAllocation(); } _SizeType GetInnerAllocationOffset(const _BlockType &block) const { return 0; } private: _InnerAllocator m_InnerAllocator; }; typedef DirectAllocator DirectHeapAllocator; typedef BlockAllocators::CDisjointBuddyAllocator DisjointBuddyHeapAllocator; class ThreadSafeBuddyHeapAllocator : DisjointBuddyHeapAllocator { public: template ThreadSafeBuddyHeapAllocator(UINT64 maxBlockSize, UINT64 threshold, bool bNeedsThreadSafety, InnerAllocatorArgs&&... innerArgs) : // throw(std::bad_alloc) DisjointBuddyHeapAllocator(maxBlockSize, threshold, std::forward(innerArgs)...), m_Lock(bNeedsThreadSafety) {} ThreadSafeBuddyHeapAllocator() = default; ThreadSafeBuddyHeapAllocator(ThreadSafeBuddyHeapAllocator&&) = default; ThreadSafeBuddyHeapAllocator& operator=(ThreadSafeBuddyHeapAllocator&&) = default; HeapSuballocationBlock Allocate(UINT64 size) { auto scopedLock = m_Lock.TakeLock(); return DisjointBuddyHeapAllocator::Allocate(size); } void Deallocate(const HeapSuballocationBlock &block) { auto scopedLock = m_Lock.TakeLock(); DisjointBuddyHeapAllocator::Deallocate(block); } auto GetInnerAllocation(const HeapSuballocationBlock &block) const { auto scopedLock = m_Lock.TakeLock(); return DisjointBuddyHeapAllocator::GetInnerAllocation(block); } // Exposing methods that don't require locks. using DisjointBuddyHeapAllocator::IsOwner; private: OptLock<> m_Lock; }; // Allocator that will conditionally choose to individually allocate resources or suballocate based on a // passed in function template class ConditionalAllocator { public: typedef bool(*RequiresDirectAllocationFunctionType)(typename _SizeType, typename AllocationArgs); template ConditionalAllocator(std::tuple suballocatedAllocatorArgs, std::tuple directHeapAllocatorArgs, RequiresDirectAllocationFunctionType pfnRequiresDirectHeapAllocator) : m_DirectAllocator(std::get(directHeapAllocatorArgs)...), m_SuballocationAllocator(std::get(suballocatedAllocatorArgs)...), m_pfnUseDirectHeapAllocator(pfnRequiresDirectHeapAllocator) { } _BlockType Allocate(_SizeType size, AllocationArgs args) { if (m_pfnUseDirectHeapAllocator(size, args)) { return m_DirectAllocator.Allocate(size); } else { return m_SuballocationAllocator.Allocate(size); } } void Deallocate(const _BlockType &block) { assert(IsOwner(block)); if (block.IsDirectAllocation()) { m_DirectAllocator.Deallocate(block); } else { m_SuballocationAllocator.Deallocate(block); } } bool IsOwner(const _BlockType &block) const { if (block.IsDirectAllocation()) { return m_DirectAllocator.IsOwner(block); } else { return m_SuballocationAllocator.IsOwner(block); } } void Reset() { m_DirectAllocator.Reset(); m_SuballocationAllocator.Reset(); } auto GetInnerAllocation(const _BlockType &block) const { assert(IsOwner(block)); if (block.IsDirectAllocation()) { return m_DirectAllocator.GetInnerAllocation(block); } else { return m_SuballocationAllocator.GetInnerAllocation(block); } } _SizeType GetInnerAllocationOffset(const _BlockType &block) const { assert(IsOwner(block)); if (block.IsDirectAllocation()) { return m_DirectAllocator.GetInnerAllocationOffset(block); } else { return m_SuballocationAllocator.GetInnerAllocationOffset(block); } } private: SuballocationAllocator m_SuballocationAllocator; DirectAllocator m_DirectAllocator; RequiresDirectAllocationFunctionType m_pfnUseDirectHeapAllocator; }; static constexpr UINT cBuddyAllocatorThreshold = 64 * 1024; } ================================================ FILE: include/BatchedContext.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class BatchedQuery; struct BatchedExtension { virtual void Dispatch(ImmediateContext& ImmCtx, const void* pData, size_t DataSize) = 0; }; class FreePageContainer { OptLock<> m_CS; void* m_FreePageHead = nullptr; public: class LockedAdder { std::unique_lock m_Lock; void*& m_FreePageHead; public: void AddPage(void* pPage) noexcept; LockedAdder(FreePageContainer& Container) noexcept : m_Lock(Container.m_CS.TakeLock()) , m_FreePageHead(Container.m_FreePageHead) { } }; FreePageContainer(bool bAllocCS) : m_CS(bAllocCS) {} ~FreePageContainer(); void* RemovePage() noexcept; }; class BatchedContext { private: // Gives a 0-indexed value to each command in the order they're declared. #define DECLARE_COMMAND_VALUE static constexpr UINT CmdValue = (__COUNTER__ - c_FirstCommandCounterValue); public: static constexpr size_t BatchSizeInBytes = 2048; struct BatchStorageAllocator { FreePageContainer* m_Container; void* operator()(bool bAllocSuccess) noexcept; }; using BatchPrimitive = UINT64; using BatchStorage = segmented_stack; static constexpr UINT c_MaxOutstandingBatches = 5; class Batch { friend class BatchedContext; uint64_t m_BatchID; BatchStorage m_BatchCommands; std::vector> m_PostBatchFunctions; UINT m_NumCommands; // Used to check GPU completion. Guarded by submission lock. UINT m_FlushRequestedMask = 0; Batch(BatchStorageAllocator const& allocator) : m_BatchCommands(std::nothrow, allocator) { } void Retire(FreePageContainer& FreePages) noexcept; void PrepareToSubmit(BatchStorage BatchCommands, std::vector> PostBatchFunctions, uint64_t BatchID, UINT NumCommands, bool bFlushImmCtxAfterBatch); }; static const void* AlignPtr(const void* pPtr) noexcept { return reinterpret_cast(Align(reinterpret_cast(pPtr), sizeof(BatchPrimitive))); } static void* AlignPtr(void* pPtr) noexcept { return reinterpret_cast(Align(reinterpret_cast(pPtr), sizeof(BatchPrimitive))); } // Note: This value could be anything, depending on how many times __COUNTER__ was instantiated. // But all further references of __COUNTER__ in this file subtract this value, thereby becoming // 0-based. static constexpr UINT c_FirstCommandCounterValue = __COUNTER__ + 1; struct CmdSetPipelineState { DECLARE_COMMAND_VALUE; PipelineState* pPSO; }; struct CmdDrawInstanced { DECLARE_COMMAND_VALUE; UINT countPerInstance; UINT instanceCount; UINT vertexStart; UINT instanceStart; }; struct CmdDrawIndexedInstanced { DECLARE_COMMAND_VALUE; UINT countPerInstance; UINT instanceCount; UINT indexStart; INT vertexStart; UINT instanceStart; }; struct CmdDispatch { DECLARE_COMMAND_VALUE; UINT x, y, z; }; struct CmdDrawAuto { DECLARE_COMMAND_VALUE; }; struct CmdDrawInstancedIndirect { DECLARE_COMMAND_VALUE; Resource* pBuffer; UINT offset; }; struct CmdDrawIndexedInstancedIndirect { DECLARE_COMMAND_VALUE; Resource* pBuffer; UINT offset; }; struct CmdDispatchIndirect { DECLARE_COMMAND_VALUE; Resource* pBuffer; UINT offset; }; struct CmdSetTopology { DECLARE_COMMAND_VALUE; D3D12_PRIMITIVE_TOPOLOGY topology; }; struct CmdSetVertexBuffers { DECLARE_COMMAND_VALUE; UINT _CmdValue = CmdValue; UINT startSlot; UINT numVBs; CmdSetVertexBuffers(UINT _startSlot, UINT _numVBs, Resource* const* _ppVBs, UINT const* _pStrides, UINT const* _pOffsets); static size_t GetCommandSize(UINT, UINT _numVBs, Resource* const*, UINT const*, UINT const*); // Followed by Resource[numVBs], then UINT[numVBs], then UINT[numVBs] }; struct CmdSetIndexBuffer { DECLARE_COMMAND_VALUE; Resource* pBuffer; DXGI_FORMAT format; UINT offset; }; struct CmdSetShaderResources { DECLARE_COMMAND_VALUE; EShaderStage stage; UINT startSlot; UINT numSRVs; // Followed by SRV[numSRVs] }; struct CmdSetSamplers { DECLARE_COMMAND_VALUE; EShaderStage stage; UINT startSlot; UINT numSamplers; // Followed by Sampler[numSamplers] }; struct CmdSetConstantBuffers { DECLARE_COMMAND_VALUE; UINT _CmdValue = CmdValue; EShaderStage stage; UINT startSlot; UINT numCBs; CmdSetConstantBuffers(EShaderStage _stage, UINT _startSlot, UINT _numCBs, Resource* const* _ppCBs, UINT const* _pFirstConstant, UINT const* _pNumConstants); static size_t GetCommandSize(EShaderStage, UINT, UINT numCBs, Resource* const*, UINT const*, UINT const*); // Followed by Resource[numCBs], then UINT[numCBs], then UINT[numCBs] }; struct CmdSetConstantBuffersNullOffsetSize { DECLARE_COMMAND_VALUE; EShaderStage stage; UINT startSlot; UINT numCBs; // Followed by Resource[numCBs] }; struct CmdSetSOBuffers { DECLARE_COMMAND_VALUE; Resource* pBuffers[D3D11_SO_STREAM_COUNT]; UINT offsets[D3D11_SO_STREAM_COUNT]; }; struct CmdSetRenderTargets { DECLARE_COMMAND_VALUE; RTV* pRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; DSV* pDSV; }; struct CmdSetUAV { DECLARE_COMMAND_VALUE; bool graphics; UINT slot; UAV* pUAV; UINT initialCount; }; struct CmdSetStencilRef { DECLARE_COMMAND_VALUE; UINT ref; }; struct CmdSetBlendFactor { DECLARE_COMMAND_VALUE; float factor[4]; }; struct CmdSetViewport { DECLARE_COMMAND_VALUE; UINT slot; D3D12_VIEWPORT viewport; }; struct CmdSetNumViewports { DECLARE_COMMAND_VALUE; UINT num; }; struct CmdSetScissorRect { DECLARE_COMMAND_VALUE; UINT slot; D3D12_RECT rect; }; struct CmdSetNumScissorRects { DECLARE_COMMAND_VALUE; UINT num; }; struct CmdSetScissorEnable { DECLARE_COMMAND_VALUE; bool enable; }; template struct CmdClearView { View* pView; ColorType color[4]; UINT numRects; // Followed by D3D12_RECT[numRects] CmdClearView(View* _pView, const ColorType _color[4], UINT _numRects) : pView(_pView), numRects(_numRects) { std::copy(_color, _color + 4, color); } }; struct CmdClearRenderTargetView : CmdClearView { DECLARE_COMMAND_VALUE; CmdClearRenderTargetView(RTV* _pView, const FLOAT _color[4], UINT _numRects) : CmdClearView(_pView, _color, _numRects) { } }; struct CmdClearDepthStencilView { DECLARE_COMMAND_VALUE; DSV* pView; UINT flags; FLOAT depth; UINT8 stencil; UINT numRects; // Followed by D3D12_RECT[numRects] }; struct CmdClearUnorderedAccessViewUint : CmdClearView { DECLARE_COMMAND_VALUE; CmdClearUnorderedAccessViewUint(UAV* _pView, const UINT _color[4], UINT _numRects) : CmdClearView(_pView, _color, _numRects) { } }; struct CmdClearUnorderedAccessViewFloat : CmdClearView { DECLARE_COMMAND_VALUE; CmdClearUnorderedAccessViewFloat(UAV* _pView, const FLOAT _color[4], UINT _numRects) : CmdClearView(_pView, _color, _numRects) { } }; struct CmdClearVideoDecoderOutputView : CmdClearView { DECLARE_COMMAND_VALUE; CmdClearVideoDecoderOutputView(VDOV* _pView, const FLOAT _color[4], UINT _numRects) : CmdClearView(_pView, _color, _numRects) { } }; struct CmdClearVideoProcessorInputView : CmdClearView { DECLARE_COMMAND_VALUE; CmdClearVideoProcessorInputView(VPIV* _pView, const FLOAT _color[4], UINT _numRects) : CmdClearView(_pView, _color, _numRects) { } }; struct CmdClearVideoProcessorOutputView : CmdClearView { DECLARE_COMMAND_VALUE; CmdClearVideoProcessorOutputView(VPOV* _pView, const FLOAT _color[4], UINT _numRects) : CmdClearView(_pView, _color, _numRects) { } }; struct CmdDiscardView { DECLARE_COMMAND_VALUE; ViewBase* pView; UINT numRects; // Followed by D3D12_RECT[numRects] }; struct CmdDiscardResource { DECLARE_COMMAND_VALUE; Resource* pResource; UINT numRects; // Followed by D3D12_RECT[numRects] }; struct CmdGenMips { DECLARE_COMMAND_VALUE; SRV* pSRV; D3D12_FILTER_TYPE filterType; }; struct CmdFinalizeUpdateSubresources { DECLARE_COMMAND_VALUE; Resource* pDst; ImmediateContext::PreparedUpdateSubresourcesOperation Op; }; struct CmdFinalizeUpdateSubresourcesWithLocalPlacement { DECLARE_COMMAND_VALUE; Resource* pDst; ImmediateContext::PreparedUpdateSubresourcesOperationWithLocalPlacement Op; }; struct CmdRename { DECLARE_COMMAND_VALUE; Resource* pResource; Resource* pRenameResource; }; struct CmdRenameViaCopy { DECLARE_COMMAND_VALUE; Resource* pResource; Resource* pRenameResource; UINT dirtyPlaneMask; }; struct CmdQueryBegin { DECLARE_COMMAND_VALUE; Async* pQuery; }; struct CmdQueryEnd { DECLARE_COMMAND_VALUE; Async* pQuery; }; struct CmdSetPredication { DECLARE_COMMAND_VALUE; Query* pPredicate; BOOL Value; }; struct CmdResourceCopy { DECLARE_COMMAND_VALUE; Resource* pDst; Resource* pSrc; }; struct CmdResolveSubresource { DECLARE_COMMAND_VALUE; Resource* pDst; Resource* pSrc; UINT DstSubresource; UINT SrcSubresource; DXGI_FORMAT Format; }; struct CmdResourceCopyRegion { DECLARE_COMMAND_VALUE; Resource* pDst; Resource* pSrc; UINT DstSubresource; UINT SrcSubresource; UINT DstX, DstY, DstZ; D3D12_BOX SrcBox; }; struct CmdSetResourceMinLOD { DECLARE_COMMAND_VALUE; Resource* pResource; FLOAT MinLOD; }; struct CmdCopyStructureCount { DECLARE_COMMAND_VALUE; Resource* pDst; UAV* pSrc; UINT DstOffset; }; struct CmdRotateResourceIdentities { DECLARE_COMMAND_VALUE; UINT NumResources; // Followed by Resource*[NumResources] }; struct CmdExtension { DECLARE_COMMAND_VALUE; UINT _CmdValue = CmdValue; BatchedExtension* pExt; size_t DataSize; CmdExtension(BatchedExtension*, const void*, size_t); static size_t GetCommandSize(BatchedExtension*, const void*, size_t DataSize); }; struct CmdSetHardwareProtection { DECLARE_COMMAND_VALUE; Resource* pResource; UINT Value; }; struct CmdSetHardwareProtectionState { DECLARE_COMMAND_VALUE; BOOL State; }; struct CmdClearState { DECLARE_COMMAND_VALUE; }; struct CmdUpdateTileMappings { DECLARE_COMMAND_VALUE; UINT _CmdValue = CmdValue; UINT NumTiledResourceRegions; UINT NumRanges; ImmediateContext::TILE_MAPPING_FLAG Flags; Resource* pTiledResource; Resource* pTilePool; bool bTiledResourceRegionSizesPresent; bool bRangeFlagsPresent; bool bTilePoolStartOffsetsPresent; bool bRangeTileCountsPresent; CmdUpdateTileMappings(Resource*, UINT, const D3D12_TILED_RESOURCE_COORDINATE*, const D3D12_TILE_REGION_SIZE*, Resource*, UINT, const ImmediateContext::TILE_RANGE_FLAG*, const UINT*, const UINT*, ImmediateContext::TILE_MAPPING_FLAG); static size_t GetCommandSize(Resource*, UINT, const D3D12_TILED_RESOURCE_COORDINATE*, const D3D12_TILE_REGION_SIZE*, Resource*, UINT, const ImmediateContext::TILE_RANGE_FLAG*, const UINT*, const UINT*, ImmediateContext::TILE_MAPPING_FLAG); // Followed by D3D12_TILED_RESOURCE_COORDINATE[NumTiledResourceRegions], D3D12_TILE_REGION_SIZE[NumTiledResourceRegions], // TILE_RANGE_FLAG[NumRanges], UINT[NumRanges], UINT[NumRanges] }; struct CmdCopyTileMappings { DECLARE_COMMAND_VALUE; Resource* pDstTiledResource; Resource* pSrcTiledResource; D3D12_TILED_RESOURCE_COORDINATE DstStartCoords; D3D12_TILED_RESOURCE_COORDINATE SrcStartCoords; D3D12_TILE_REGION_SIZE TileRegion; ImmediateContext::TILE_MAPPING_FLAG Flags; }; struct CmdCopyTiles { DECLARE_COMMAND_VALUE; Resource* pResource; Resource* pBuffer; D3D12_TILED_RESOURCE_COORDINATE StartCoords; D3D12_TILE_REGION_SIZE TileRegion; UINT64 BufferOffset; ImmediateContext::TILE_COPY_FLAG Flags; }; struct CmdTiledResourceBarrier { DECLARE_COMMAND_VALUE; Resource* pBefore; Resource* pAfter; }; struct CmdResizeTilePool { DECLARE_COMMAND_VALUE; Resource* pTilePool; UINT64 NewSize; }; struct CmdExecuteNestedBatch { DECLARE_COMMAND_VALUE; Batch* pBatch; BatchedContext* pThis; }; struct CmdSetMarker { DECLARE_COMMAND_VALUE; UINT NumChars; // Followed by wchar_t[NumChars] }; struct CmdBeginEvent { DECLARE_COMMAND_VALUE; UINT NumChars; // Followed by wchar_t[NumChars] }; struct CmdEndEvent { DECLARE_COMMAND_VALUE; }; // The 0-based value of the last command. static constexpr UINT c_LastCommand = __COUNTER__ - c_FirstCommandCounterValue - 1; struct CreationArgs { bool CreatesAndDestroysAreMultithreaded : 1; bool SubmitBatchesToWorkerThread : 1; BatchedContext* pParentContext; }; struct Callbacks { std::function ThreadErrorCallback; std::function PostSubmitCallback; }; BatchedContext(ImmediateContext& ImmCtx, CreationArgs flags, Callbacks const& callbacks); ~BatchedContext(); bool TRANSLATION_API ProcessBatch(); bool TRANSLATION_API SubmitBatch(bool bFlushImmCtxAfterBatch = false); void TRANSLATION_API SubmitBatchIfIdle(bool bSkipFrequencyCheck = false); std::unique_ptr TRANSLATION_API FinishBatch(bool bFlushImmCtxAfterBatch = false); void TRANSLATION_API SubmitCommandListBatch(Batch*); void TRANSLATION_API RetireBatch(std::unique_ptr); ImmediateContext &FlushBatchAndGetImmediateContext() { ProcessBatch(); return m_ImmCtx; } ImmediateContext &GetImmediateContextNoFlush() { return m_ImmCtx; } template void AddPostBatchFunction(TFunc&& f) { auto Lock = m_RecordingLock.TakeLock(); m_PostBatchFunctions.emplace_back(std::forward(f)); } template void TRANSLATION_API DeleteObject(T* pObject) { AddPostBatchFunction([pObject]() { delete pObject; }); } void TRANSLATION_API ReleaseResource(Resource* pResource) { auto Lock = m_RecordingLock.TakeLock(); auto Size = pResource->GetResourceSize(); m_PostBatchFunctions.emplace_back([pResource]() { pResource->Release(); }); m_PendingDestructionMemorySize += Size; if (m_PendingDestructionMemorySize >= 64 * 1024 * 1024 || pResource->Parent()->IsShared()) { SubmitBatch(); } } void TRANSLATION_API PostSubmit(); void TRANSLATION_API SetPipelineState(PipelineState* pPipeline); void TRANSLATION_API DrawInstanced(UINT countPerInstance, UINT instanceCount, UINT vertexStart, UINT instanceStart); void TRANSLATION_API DrawIndexedInstanced(UINT countPerInstance, UINT instanceCount, UINT indexStart, INT vertexStart, UINT instanceStart); void TRANSLATION_API DrawAuto(); void TRANSLATION_API DrawIndexedInstancedIndirect(Resource*, UINT offset); void TRANSLATION_API DrawInstancedIndirect(Resource*, UINT offset); void TRANSLATION_API Dispatch(UINT x, UINT y, UINT z); void TRANSLATION_API DispatchIndirect(Resource*, UINT offset); bool TRANSLATION_API Flush(UINT commandListMask) { return FlushBatchAndGetImmediateContext().Flush(commandListMask); } void TRANSLATION_API IaSetTopology(D3D12_PRIMITIVE_TOPOLOGY); void TRANSLATION_API IaSetVertexBuffers(UINT, __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) UINT, Resource**, const UINT*, const UINT*); void TRANSLATION_API IaSetIndexBuffer(Resource*, DXGI_FORMAT, UINT offset); template void TRANSLATION_API SetShaderResources(UINT, __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT) UINT, SRV* const*); template void TRANSLATION_API SetSamplers(UINT, __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) UINT, Sampler**); template void TRANSLATION_API SetConstantBuffers(UINT, __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT) UINT Buffers, Resource**, __in_ecount_opt(Buffers) CONST UINT* pFirstConstant, __in_ecount_opt(Buffers) CONST UINT* pNumConstants); void TRANSLATION_API SoSetTargets(_In_range_(0, 4) UINT NumTargets, _In_range_(0, 4) UINT, _In_reads_(NumTargets) Resource**, _In_reads_(NumTargets) const UINT*); void TRANSLATION_API OMSetRenderTargets(__in_ecount(NumRTVs) RTV** pRTVs, __in_range(0, 8) UINT NumRTVs, __in_opt DSV *, __in_ecount(NumUavs) UAV ** pUavs, CONST UINT*, UINT, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumUavs); void TRANSLATION_API CsSetUnorderedAccessViews(UINT, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumViews, __in_ecount(NumViews) UAV**, __in_ecount(NumViews) CONST UINT*); void TRANSLATION_API OMSetStencilRef(UINT); void TRANSLATION_API OMSetBlendFactor(const FLOAT[4]); void TRANSLATION_API SetViewports(UINT, const D3D12_VIEWPORT*); void TRANSLATION_API SetScissorRects(UINT, const D3D12_RECT*); void TRANSLATION_API SetScissorRectEnable(BOOL); void TRANSLATION_API ClearRenderTargetView(RTV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearDepthStencilView(DSV *, UINT, FLOAT, UINT8, UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearUnorderedAccessViewUint(UAV *, CONST UINT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearUnorderedAccessViewFloat(UAV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearVideoDecoderOutputView(VDOV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearVideoProcessorInputView(VPIV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearVideoProcessorOutputView(VPOV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API DiscardView(ViewBase* pView, const D3D12_RECT*, UINT); void TRANSLATION_API DiscardResource(Resource* pResource, const D3D12_RECT*, UINT); void TRANSLATION_API GenMips(SRV *, D3D12_FILTER_TYPE FilterType); void TRANSLATION_API ResourceUpdateSubresourceUP(Resource* pResource, UINT DstSubresource, _In_opt_ const D3D12_BOX* pDstBox, _In_ const VOID* pMem, UINT SrcPitch, UINT SrcDepth); void TRANSLATION_API UploadInitialData(Resource* pDst, D3D12TranslationLayer::CSubresourceSubset const& Subresources, _In_reads_opt_(_Inexpressible_(Subresources.NumNonExtendedSubresources())) const D3D11_SUBRESOURCE_DATA* pSrcData, _In_opt_ const D3D12_BOX* pDstBox); void TRANSLATION_API QueryBegin(BatchedQuery*); void TRANSLATION_API QueryEnd(BatchedQuery*); bool TRANSLATION_API QueryGetData(BatchedQuery*, void*, UINT, bool DoNotFlush); void TRANSLATION_API SetPredication(Query*, BOOL); // Map methods // Make sure the batch is completed, then call into the immediate context to wait for the GPU. bool TRANSLATION_API MapUnderlyingSynchronize(BatchedResource* pResource, UINT Subresource, MAP_TYPE, bool, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource*); // Make sure the batch is completed, then call into the immediate context to figure out how to do the map. bool TRANSLATION_API MapDefault(BatchedResource* pResource, UINT Subresource, MAP_TYPE, bool, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource*); // Call thread-safe immediate context methods to acquire a mappable buffer, and queue a rename operation. bool TRANSLATION_API RenameAndMapBuffer(BatchedResource* pResource, MappedSubresource*); // Call thread-safe immediate context methods to acquire a mappable buffer - don't queue anything yet. bool TRANSLATION_API MapForRenameViaCopy(BatchedResource* pResource, UINT Subresource, MappedSubresource*); // Re-map the last-acquired buffer associated with a resource. bool TRANSLATION_API MapRenamedBuffer(BatchedResource* pResource, MappedSubresource*); // Unmap methods // Just unmap the renamed buffer. void TRANSLATION_API UnmapRenamedBuffer(BatchedResource* pResource, _In_opt_ const D3D12_BOX *pReadWriteRange); // Map enforced synchronization, just forward to immediate context. void TRANSLATION_API UnmapDefault(BatchedResource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange); // Map enforced synchronization, just forward to immediate context. void TRANSLATION_API UnmapStaging(BatchedResource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange); // Unmap the buffer and queue a rename-via-copy operation. void TRANSLATION_API UnmapAndRenameViaCopy(BatchedResource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange); void TRANSLATION_API ResourceCopy(Resource*, Resource*); void TRANSLATION_API ResourceResolveSubresource(Resource*, UINT, Resource*, UINT, DXGI_FORMAT); void TRANSLATION_API ResourceCopyRegion(Resource*, UINT, UINT, UINT, UINT, Resource*, UINT, const D3D12_BOX*); void TRANSLATION_API SetResourceMinLOD(Resource*, FLOAT); void TRANSLATION_API CopyStructureCount(Resource*, UINT, UAV*); void TRANSLATION_API UpdateTileMappings(Resource* hTiledResource, UINT NumTiledResourceRegions, _In_reads_(NumTiledResourceRegions) const D3D12_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoords, _In_reads_opt_(NumTiledResourceRegions) const D3D12_TILE_REGION_SIZE* pTiledResourceRegionSizes, Resource* hTilePool, UINT NumRanges, _In_reads_opt_(NumRanges) const ImmediateContext::TILE_RANGE_FLAG* pRangeFlags, _In_reads_opt_(NumRanges) const UINT* pTilePoolStartOffsets, _In_reads_opt_(NumRanges) const UINT* pRangeTileCounts, ImmediateContext::TILE_MAPPING_FLAG Flags); void TRANSLATION_API CopyTileMappings(Resource* pDstTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pDstStartCoords, Resource* pSrcTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pSrcStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, ImmediateContext::TILE_MAPPING_FLAG Flags); void TRANSLATION_API CopyTiles(Resource* pResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, Resource* pBuffer, UINT64 BufferOffset, ImmediateContext::TILE_COPY_FLAG Flags); void TRANSLATION_API UpdateTiles(Resource* pResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pCoord, _In_ const D3D12_TILE_REGION_SIZE* pRegion, const _In_ VOID* pData, UINT Flags); void TRANSLATION_API TiledResourceBarrier(Resource* pBefore, Resource* pAfter); void TRANSLATION_API ResizeTilePool(Resource* pResource, UINT64 NewSize); void TRANSLATION_API RotateResourceIdentities(Resource* const* ppResources, UINT Resources); void TRANSLATION_API SetHardwareProtection(Resource* pResource, UINT value); void TRANSLATION_API SetHardwareProtectionState(BOOL state); void TRANSLATION_API ClearState(); void TRANSLATION_API SetMarker(const wchar_t* name); void TRANSLATION_API BeginEvent(const wchar_t* name); void TRANSLATION_API EndEvent(); void TRANSLATION_API BatchExtension(BatchedExtension* pExt, const void* pData, size_t DataSize); template void BatchExtension(BatchedExtension* pExt, T const& Data) { static_assert(std::is_trivially_destructible::value, "Destructors don't get called on batched commands."); static_assert(std::is_trivially_copyable::value, "Extensions must be trivially copyable."); BatchExtension(pExt, &Data, sizeof(Data)); } template void EmplaceBatchExtension(BatchedExtension* pExt, Args&&... args) { assert(!IsBatchThread()); auto Lock = m_RecordingLock.TakeLock(); static_assert(std::is_trivially_destructible::value, "Destructors don't get called on batched commands."); const size_t ExtensionSize = TExt::GetExtensionSize(std::forward(args)...); const size_t CommandSize = CmdExtension::GetCommandSize(pExt, nullptr, ExtensionSize); if (!m_CurrentBatch.reserve_contiguous(CommandSize / sizeof(BatchPrimitive))) { throw std::bad_alloc(); } void* pPtr = m_CurrentBatch.append_contiguous_manually(CommandSize / sizeof(BatchPrimitive)); auto pExtensionCmd = new (pPtr) CmdExtension(pExt, nullptr, ExtensionSize); new (AlignPtr(pExtensionCmd + 1)) TExt(std::forward(args)...); ++m_CurrentCommandCount; SubmitBatchIfIdle(); } using DispatcherFunction = void(*)(ImmediateContext&, const void*&); void ProcessBatchImpl(Batch* pBatchToProcess); private: ImmediateContext& m_ImmCtx; const DispatcherFunction* const m_DispatchTable; const CreationArgs m_CreationArgs; template void AddToBatch(BatchStorage& CurrentBatch, TCmd const& command); template void AddToBatch(TCmd const& command) { auto Lock = m_RecordingLock.TakeLock(); AddToBatch(m_CurrentBatch, command); ++m_CurrentCommandCount; SubmitBatchIfIdle(); } template void AddToBatchVariableSize(TCmd const& command, UINT NumEntries, TEntry const* entries); template void EmplaceInBatch(Args&&... args); void ProcessBatchWork(BatchStorage& batch); bool WaitForBatchThreadIdle(); bool IsBatchThreadIdle(); bool WaitForSingleBatch(DWORD timeout); bool IsBatchThread(); void BatchThread(); template bool SyncWithBatch(uint64_t& BatchID, bool DoNotFlush, TFunc&& GetImmObjectFenceValues); std::unique_ptr GetIdleBatch(); private: // Referenced by recording and batch threads SafeHANDLE m_BatchThread; SafeHANDLE m_BatchSubmittedSemaphore; // Signaled by recording thread to indicate new work available. SafeHANDLE m_BatchConsumedSemaphore; // Signaled by batch thread to indicate it's completed work, waited on by main thread when work submitted. OptLock<> m_SubmissionLock{ m_CreationArgs.SubmitBatchesToWorkerThread }; // Synchronizes the deques and free page list. std::deque> m_QueuedBatches; std::deque> m_FreeBatches; // Note: Must be declared before BatchStorageAllocator FreePageContainer m_FreePages{ m_CreationArgs.SubmitBatchesToWorkerThread }; uint64_t m_CompletedBatchID = 0; const Callbacks m_Callbacks; std::atomic m_bFlushPendingCallback; private: // Referenced by recording thread CBoundState m_UAVs; UINT m_NumScissors = 0; D3D12_RECT m_Scissors[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; UINT m_NumViewports = 0; D3D12_VIEWPORT m_Viewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; void ClearStateImpl(); const BatchStorageAllocator m_BatchStorageAllocator { m_CreationArgs.pParentContext ? &m_CreationArgs.pParentContext->m_FreePages : (m_CreationArgs.SubmitBatchesToWorkerThread ? &m_FreePages : nullptr) }; static constexpr UINT c_CommandKickoffMinThreshold = 10; // Arbitrary for now OptLock m_RecordingLock{ m_CreationArgs.CreatesAndDestroysAreMultithreaded }; UINT m_CurrentCommandCount = 0; UINT m_NumOutstandingBatches = 0; uint64_t m_RecordingBatchID = 1; BatchStorage m_CurrentBatch{ m_BatchStorageAllocator }; private: // Written by non-recording application threads, read by recording thread std::vector> m_PostBatchFunctions; uint64_t m_PendingDestructionMemorySize = 0; }; struct BatchedDeleter { BatchedContext& Context; template void operator()(TPtr* pObject) { Context.DeleteObject(pObject); } }; template using unique_batched_ptr = std::unique_ptr; class BatchedDeviceChild { public: BatchedDeviceChild(BatchedContext& Parent) noexcept : m_Parent(Parent) { } void ProcessBatch(); BatchedContext& m_Parent; }; template class BatchedDeviceChildImpl : public BatchedDeviceChild { public: template BatchedDeviceChildImpl(BatchedContext& Parent, Args&&... args) : BatchedDeviceChild(Parent) , m_pImmediate(new TImmediate(&Parent.GetImmediateContextNoFlush(), std::forward(args)...), BatchedDeleter{ Parent }) { } TImmediate& FlushBatchAndGetImmediate() { ProcessBatch(); return *m_pImmediate; } TImmediate& GetImmediateNoFlush() { return *m_pImmediate; } protected: unique_batched_ptr m_pImmediate; }; } ================================================ FILE: include/BatchedQuery.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class BatchedQuery : public BatchedDeviceChild { unique_batched_ptr m_spImmediateAsync; Async* m_pImmediateAsyncWeak; public: BatchedQuery(BatchedContext& Context, Async* pAsync, bool ownsAsync) : BatchedDeviceChild(Context) , m_spImmediateAsync(ownsAsync ? pAsync : nullptr, { Context }) , m_pImmediateAsyncWeak(pAsync) { } Async* GetImmediateNoFlush() { return m_pImmediateAsyncWeak; } Async* FlushBatchAndGetImmediate() { ProcessBatch(); return m_pImmediateAsyncWeak; } Async::AsyncState m_CurrentState = Async::AsyncState::Ended; uint64_t m_BatchReferenceID = 0; }; } ================================================ FILE: include/BatchedResource.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class BatchedResource : public BatchedDeviceChild { public: BatchedResource(BatchedContext& Context, Resource* underlyingResource, bool ownsResource) : BatchedDeviceChild(Context) , m_spResource(ownsResource ? underlyingResource : nullptr, { Context }) , m_pResource(underlyingResource) // Resources constructed without ownership don't support Map(NO_OVERWRITE) until they've been discarded once. , m_LastRenamedResource(ownsResource ? m_pResource->GetIdentity()->m_suballocation : decltype(m_LastRenamedResource){}) { // For simplicity we'll let the immediate resource be constructed before the batched, but it should've been constructed properly. assert(m_pResource->m_isValid); } struct BatchedDeleter { BatchedContext& Context; void operator()(Resource* p) { Context.ReleaseResource(p); } }; std::unique_ptr const m_spResource; Resource* const m_pResource; // Used to implement Map(NO_OVERWRITE) D3D12ResourceSuballocation m_LastRenamedResource; // Currently, the batched context only supports D3D11 dynamic resources, which are single-subresource and write-only. Resource::DynamicTexturePlaneData m_DynamicTexturePlaneData; SafeRenameResourceCookie m_PendingRenameViaCopyCookie; }; } ================================================ FILE: include/BlitHelper.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class ImmediateContext; class Resource; union BlitHelperKeyUnion { struct Bits { UINT SrcFormat : 8; UINT DstFormat : 8; UINT DstSampleCount : 4; UINT bEnableAlpha : 1; UINT bSwapRB : 1; UINT Unused : 10; } m_Bits; UINT m_Data; }; static_assert(sizeof(BlitHelperKeyUnion) == sizeof(BlitHelperKeyUnion::m_Data)); class BlitHelper { public: BlitHelper(ImmediateContext *pContext); void Blit(Resource *pSrc, UINT *pSrcSubresourceIndices, UINT numSrcSubresources, const RECT& srcRect, Resource *pDst, UINT *pDstSubresourceIndices, UINT numDstSubresources, const RECT& dstRect, bool bEnableAlpha = false, bool bSwapRBChannels = false); void Blit(Resource* pSrc, UINT SrcSubresourceIdx, const RECT& srcRect, Resource* pDst, UINT DstSubresourceIdx, const RECT& dstRect, bool bEnableAlpha = false, bool bSwapRBChannels = false); protected: using BlitPipelineState = DeviceChildImpl; BlitPipelineState* PrepareShaders(Resource *pSrc, UINT srcPlanes, Resource *pDst, UINT dstPlanes, bool bEnableAlpha, bool bSwapRB, int &outSrcPixelScalingFactor); ImmediateContext* const m_pParent; std::unordered_map> m_spBlitPSOs; std::unique_ptr m_spRootSig; private: //@param ppResource: will be updated to point at the resolved resource //@param pSubresourceIndices: will be updated to reflect the resolved resource's subresource indecies void ResolveToNonMsaa( _Inout_ Resource **ppResource, _Inout_ UINT* pSubresourceIndices, UINT numSubresources ); }; }; ================================================ FILE: include/BlitHelperShaders.h ================================================ #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Buffer Definitions: // // cbuffer srcInfo // { // // int g_srcLeft; // Offset: 0 Size: 4 // int g_srcRight; // Offset: 4 Size: 4 // int g_srcTop; // Offset: 8 Size: 4 // int g_srcBottom; // Offset: 12 Size: 4 // int g_srcWidth; // Offset: 16 Size: 4 // int g_srcHeight; // Offset: 20 Size: 4 // // } // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // srcInfo cbuffer NA NA CB0 cb1 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_VertexID 0 x 0 VERTID uint x // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float xyzw // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int xy // vs_5_1 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[1:1][2], immediateIndexed, space=0 dcl_input_sgv v0.x, vertex_id dcl_output_siv o0.xyzw, position dcl_output o1.xy dcl_output o2.xy dcl_temps 2 switch v0.x case l(0) mov r0.xyzw, l(-1.000000,1.000000,0.500000,1.000000) mov r1.xy, CB0[1][0].xzxx break case l(1) mov r0.xyzw, l(1.000000,1.000000,0.500000,1.000000) mov r1.xy, CB0[1][0].yzyy break case l(2) mov r0.xyzw, l(-1.000000,-1.000000,0.500000,1.000000) mov r1.xy, CB0[1][0].xwxx break case l(3) mov r0.xyzw, l(1.000000,-1.000000,0.500000,1.000000) mov r1.xy, CB0[1][0].ywyy break default break endswitch mov o0.xyzw, r0.xyzw itof r0.xy, r1.xyxx itof r0.zw, CB0[1][1].xxxy div o1.xy, r0.xyxx, r0.zwzz mov o2.xy, r1.xyxx ret // Approximately 26 instruction slots used #endif const BYTE g_VSMain[] = { 68, 88, 66, 67, 152, 120, 215, 76, 189, 120, 214, 2, 11, 73, 209, 128, 254, 21, 137, 42, 1, 0, 0, 0, 104, 6, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 72, 2, 0, 0, 124, 2, 0, 0, 236, 2, 0, 0, 28, 5, 0, 0, 184, 5, 0, 0, 82, 68, 69, 70, 8, 2, 0, 0, 1, 0, 0, 0, 108, 0, 0, 0, 1, 0, 0, 0, 60, 0, 0, 0, 1, 5, 254, 255, 0, 5, 0, 0, 223, 1, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 114, 99, 73, 110, 102, 111, 0, 100, 0, 0, 0, 6, 0, 0, 0, 132, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 116, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 168, 1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 179, 1, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 188, 1, 0, 0, 12, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 200, 1, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 211, 1, 0, 0, 20, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 132, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 103, 95, 115, 114, 99, 76, 101, 102, 116, 0, 105, 110, 116, 0, 171, 171, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126, 1, 0, 0, 103, 95, 115, 114, 99, 82, 105, 103, 104, 116, 0, 103, 95, 115, 114, 99, 84, 111, 112, 0, 103, 95, 115, 114, 99, 66, 111, 116, 116, 111, 109, 0, 103, 95, 115, 114, 99, 87, 105, 100, 116, 104, 0, 103, 95, 115, 114, 99, 72, 101, 105, 103, 104, 116, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 171, 73, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 83, 86, 95, 86, 101, 114, 116, 101, 120, 73, 68, 0, 79, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 12, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 12, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 83, 72, 69, 88, 40, 2, 0, 0, 81, 0, 1, 0, 138, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 4, 18, 16, 16, 0, 0, 0, 0, 0, 6, 0, 0, 0, 103, 0, 0, 4, 242, 32, 16, 0, 0, 0, 0, 0, 1, 0, 0, 0, 101, 0, 0, 3, 50, 32, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 50, 32, 16, 0, 2, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 76, 0, 0, 3, 10, 16, 16, 0, 0, 0, 0, 0, 6, 0, 0, 3, 1, 64, 0, 0, 0, 0, 0, 0, 54, 0, 0, 8, 242, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 191, 0, 0, 128, 63, 0, 0, 0, 63, 0, 0, 128, 63, 54, 0, 0, 7, 50, 0, 16, 0, 1, 0, 0, 0, 134, 128, 48, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 6, 0, 0, 3, 1, 64, 0, 0, 1, 0, 0, 0, 54, 0, 0, 8, 242, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 0, 63, 0, 0, 128, 63, 54, 0, 0, 7, 50, 0, 16, 0, 1, 0, 0, 0, 150, 133, 48, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 6, 0, 0, 3, 1, 64, 0, 0, 2, 0, 0, 0, 54, 0, 0, 8, 242, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 191, 0, 0, 128, 191, 0, 0, 0, 63, 0, 0, 128, 63, 54, 0, 0, 7, 50, 0, 16, 0, 1, 0, 0, 0, 198, 128, 48, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 6, 0, 0, 3, 1, 64, 0, 0, 3, 0, 0, 0, 54, 0, 0, 8, 242, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 63, 0, 0, 128, 191, 0, 0, 0, 63, 0, 0, 128, 63, 54, 0, 0, 7, 50, 0, 16, 0, 1, 0, 0, 0, 214, 133, 48, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 10, 0, 0, 1, 2, 0, 0, 1, 23, 0, 0, 1, 54, 0, 0, 5, 242, 32, 16, 0, 0, 0, 0, 0, 70, 14, 16, 0, 0, 0, 0, 0, 43, 0, 0, 5, 50, 0, 16, 0, 0, 0, 0, 0, 70, 0, 16, 0, 1, 0, 0, 0, 43, 0, 0, 7, 194, 0, 16, 0, 0, 0, 0, 0, 6, 132, 48, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 14, 0, 0, 7, 50, 32, 16, 0, 1, 0, 0, 0, 70, 0, 16, 0, 0, 0, 0, 0, 230, 10, 16, 0, 0, 0, 0, 0, 54, 0, 0, 5, 50, 32, 16, 0, 2, 0, 0, 0, 70, 0, 16, 0, 1, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 26, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // g_linearSampler sampler NA NA S0 s0 1 // inputTexture texture float4 2d T0 t0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_input_ps linear v1.xy dcl_output o0.xyzw sample o0.xyzw, v1.xyxx, T0[0].xyzw, S0[0] ret // Approximately 2 instruction slots used #endif const BYTE g_PSBasic[] = { 68, 88, 66, 67, 34, 204, 146, 105, 207, 73, 99, 89, 2, 194, 102, 190, 175, 6, 238, 4, 1, 0, 0, 0, 148, 3, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 20, 1, 0, 0, 132, 1, 0, 0, 184, 1, 0, 0, 72, 2, 0, 0, 228, 2, 0, 0, 82, 68, 69, 70, 212, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 169, 0, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 95, 108, 105, 110, 101, 97, 114, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 171, 171, 171, 73, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 97, 114, 103, 101, 116, 0, 171, 171, 83, 72, 69, 88, 136, 0, 0, 0, 81, 0, 0, 0, 34, 0, 0, 0, 106, 8, 0, 1, 90, 0, 0, 6, 70, 110, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 69, 0, 0, 11, 242, 32, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // g_linearSampler sampler NA NA S0 s0 1 // inputTexture texture float4 2d T0 t0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_input_ps linear v1.xy dcl_output o0.xyzw dcl_temps 1 sample r0.xyzw, v1.xyxx, T0[0].xyzw, S0[0] mov o0.xyzw, r0.zyxw ret // Approximately 3 instruction slots used #endif const BYTE g_PSBasic_SwapRB[] = { 68, 88, 66, 67, 254, 40, 15, 125, 11, 181, 92, 5, 103, 24, 214, 231, 7, 248, 196, 241, 1, 0, 0, 0, 176, 3, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 20, 1, 0, 0, 132, 1, 0, 0, 184, 1, 0, 0, 100, 2, 0, 0, 0, 3, 0, 0, 82, 68, 69, 70, 212, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 169, 0, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 95, 108, 105, 110, 101, 97, 114, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 171, 171, 171, 73, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 97, 114, 103, 101, 116, 0, 171, 171, 83, 72, 69, 88, 164, 0, 0, 0, 81, 0, 0, 0, 41, 0, 0, 0, 106, 8, 0, 1, 90, 0, 0, 6, 70, 110, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 1, 0, 0, 0, 69, 0, 0, 11, 242, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 5, 242, 32, 16, 0, 0, 0, 0, 0, 102, 12, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // g_linearSampler sampler NA NA S0 s0 1 // inputTexture texture float4 2d T0 t0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_TARGET 0 xyzw 0 TARGET float xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_input_ps linear v1.xy dcl_output o0.xyzw dcl_temps 2 sample r0.xyzw, v1.xyxx, T0[0].xyzw, S0[0] mad r1.xyzw, r0.zxyx, l(256.000000, 256.000000, 256.000000, 256.000000), l(-16.000000, -128.000000, -128.000000, -128.000000) mul r0.xyz, r1.xyzx, l(298.000000, 409.000000, 100.000000, 0.000000) add r0.y, r0.y, r0.x add r0.y, r0.y, l(128.000000) mul r0.y, r0.y, l(0.003906) max r0.y, r0.y, l(0.000000) mad r0.z, r1.x, l(298.000000), -r0.z mad r0.z, -r1.w, l(208.000000), r0.z add r0.z, r0.z, l(128.000000) mul r0.z, r0.z, l(0.003906) max r0.z, r0.z, l(0.000000) mad r0.x, r1.z, l(516.000000), r0.x add r0.x, r0.x, l(128.000000) mul r0.x, r0.x, l(0.003906) max r0.x, r0.x, l(0.000000) min r0.xyz, r0.xyzx, l(256.000000, 256.000000, 256.000000, 0.000000) mul o0.xyz, r0.yzxy, l(0.003906, 0.003906, 0.003906, 0.000000) mov o0.w, r0.w ret // Approximately 20 instruction slots used #endif const BYTE g_PSAYUV[] = { 68, 88, 66, 67, 94, 5, 209, 238, 19, 54, 136, 38, 214, 63, 118, 156, 142, 240, 11, 68, 1, 0, 0, 0, 240, 5, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 20, 1, 0, 0, 132, 1, 0, 0, 184, 1, 0, 0, 164, 4, 0, 0, 64, 5, 0, 0, 82, 68, 69, 70, 212, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 169, 0, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 95, 108, 105, 110, 101, 97, 114, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 171, 171, 171, 73, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 65, 82, 71, 69, 84, 0, 171, 171, 83, 72, 69, 88, 228, 2, 0, 0, 81, 0, 0, 0, 185, 0, 0, 0, 106, 8, 0, 1, 90, 0, 0, 6, 70, 110, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 69, 0, 0, 11, 242, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 15, 242, 0, 16, 0, 1, 0, 0, 0, 38, 1, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 2, 64, 0, 0, 0, 0, 128, 193, 0, 0, 0, 195, 0, 0, 0, 195, 0, 0, 0, 195, 56, 0, 0, 10, 114, 0, 16, 0, 0, 0, 0, 0, 70, 2, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 0, 0, 149, 67, 0, 128, 204, 67, 0, 0, 200, 66, 0, 0, 0, 0, 0, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 149, 67, 42, 0, 16, 128, 65, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 66, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 128, 65, 0, 0, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 80, 67, 42, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 50, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 1, 68, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 51, 0, 0, 10, 114, 0, 16, 0, 0, 0, 0, 0, 70, 2, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 0, 0, 56, 0, 0, 10, 114, 32, 16, 0, 0, 0, 0, 0, 150, 4, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 0, 0, 54, 0, 0, 5, 130, 32, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 20, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // g_linearSampler sampler NA NA S0 s0 1 // inputTexture texture float4 2d T0 t0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_TARGET 0 xyzw 0 TARGET float xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_input_ps linear v1.xy dcl_output o0.xyzw dcl_temps 2 sample r0.xyzw, v1.xyxx, T0[0].xyzw, S0[0] mad r1.xyzw, r0.yzxz, l(256.000000, 256.000000, 256.000000, 256.000000), l(-16.000000, -128.000000, -128.000000, -128.000000) mul r0.xyz, r1.xyzx, l(298.000000, 409.000000, 100.000000, 0.000000) add r0.y, r0.y, r0.x add r0.y, r0.y, l(128.000000) mul r0.y, r0.y, l(0.003906) max r0.y, r0.y, l(0.000000) mad r0.z, r1.x, l(298.000000), -r0.z mad r0.z, -r1.w, l(208.000000), r0.z add r0.z, r0.z, l(128.000000) mul r0.z, r0.z, l(0.003906) max r0.z, r0.z, l(0.000000) mad r0.x, r1.z, l(516.000000), r0.x add r0.x, r0.x, l(128.000000) mul r0.x, r0.x, l(0.003906) max r0.x, r0.x, l(0.000000) min r0.xyz, r0.xyzx, l(256.000000, 256.000000, 256.000000, 0.000000) mul o0.xyz, r0.yzxy, l(0.003906, 0.003906, 0.003906, 0.000000) mov o0.w, r0.w ret // Approximately 20 instruction slots used #endif const BYTE g_PSY4XX[] = { 68, 88, 66, 67, 29, 47, 15, 78, 229, 245, 86, 120, 114, 140, 232, 191, 184, 210, 136, 126, 1, 0, 0, 0, 240, 5, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 20, 1, 0, 0, 132, 1, 0, 0, 184, 1, 0, 0, 164, 4, 0, 0, 64, 5, 0, 0, 82, 68, 69, 70, 212, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 169, 0, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 95, 108, 105, 110, 101, 97, 114, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 171, 171, 171, 73, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 65, 82, 71, 69, 84, 0, 171, 171, 83, 72, 69, 88, 228, 2, 0, 0, 81, 0, 0, 0, 185, 0, 0, 0, 106, 8, 0, 1, 90, 0, 0, 6, 70, 110, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 69, 0, 0, 11, 242, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 15, 242, 0, 16, 0, 1, 0, 0, 0, 150, 8, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 2, 64, 0, 0, 0, 0, 128, 193, 0, 0, 0, 195, 0, 0, 0, 195, 0, 0, 0, 195, 56, 0, 0, 10, 114, 0, 16, 0, 0, 0, 0, 0, 70, 2, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 0, 0, 149, 67, 0, 128, 204, 67, 0, 0, 200, 66, 0, 0, 0, 0, 0, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 149, 67, 42, 0, 16, 128, 65, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 66, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 128, 65, 0, 0, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 80, 67, 42, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 50, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 1, 68, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 51, 0, 0, 10, 114, 0, 16, 0, 0, 0, 0, 0, 70, 2, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 0, 0, 56, 0, 0, 10, 114, 32, 16, 0, 0, 0, 0, 0, 150, 4, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 0, 0, 54, 0, 0, 5, 130, 32, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 20, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // g_linearSampler sampler NA NA S0 s0 1 // inputTexture texture float4 2d T0 t0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int x // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_TARGET 0 xyzw 0 TARGET float xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_input_ps linear v1.xy dcl_input_ps constant v2.x dcl_output o0.xyzw dcl_temps 2 sample r0.xyzw, v1.xyxx, T0[0].xyzw, S0[0] and r1.x, v2.x, l(0x80000000) imax r1.y, v2.x, -v2.x and r1.y, r1.y, l(1) ineg r1.z, r1.y movc r1.x, r1.x, r1.z, r1.y movc r0.x, r1.x, r0.z, r0.x mad r0.x, r0.x, l(256.000000), l(-16.000000) mad r1.xyzw, r0.wywy, l(256.000000, 256.000000, 256.000000, 256.000000), l(-128.000000, -128.000000, -128.000000, -128.000000) mul r0.yzw, r1.xxyw, l(0.000000, 409.000000, 100.000000, 516.000000) mad r0.yw, r0.xxxx, l(0.000000, 298.000000, 0.000000, 298.000000), r0.yyyw add r0.yw, r0.yyyw, l(0.000000, 128.000000, 0.000000, 128.000000) mul r0.yw, r0.yyyw, l(0.000000, 0.003906, 0.000000, 0.003906) mad r0.x, r0.x, l(298.000000), -r0.z mad r0.x, -r1.z, l(208.000000), r0.x add r0.x, r0.x, l(128.000000) mul r0.x, r0.x, l(0.003906) max r0.xyw, r0.xyxw, l(0.000000, 0.000000, 0.000000, 0.000000) min r0.xyz, r0.xywx, l(256.000000, 256.000000, 256.000000, 0.000000) mul o0.xyz, r0.yxzy, l(0.003906, 0.003906, 0.003906, 0.000000) mov o0.w, l(1.000000) ret // Approximately 22 instruction slots used #endif const BYTE g_PSPackedYUV[] = { 68, 88, 66, 67, 225, 46, 14, 197, 135, 78, 161, 215, 73, 155, 68, 131, 236, 234, 94, 69, 1, 0, 0, 0, 120, 6, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 20, 1, 0, 0, 132, 1, 0, 0, 184, 1, 0, 0, 44, 5, 0, 0, 200, 5, 0, 0, 82, 68, 69, 70, 212, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 169, 0, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 95, 108, 105, 110, 101, 97, 114, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 171, 171, 171, 73, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 1, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 65, 82, 71, 69, 84, 0, 171, 171, 83, 72, 69, 88, 108, 3, 0, 0, 81, 0, 0, 0, 219, 0, 0, 0, 106, 8, 0, 1, 90, 0, 0, 6, 70, 110, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 98, 8, 0, 3, 18, 16, 16, 0, 2, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 69, 0, 0, 11, 242, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 7, 18, 0, 16, 0, 1, 0, 0, 0, 10, 16, 16, 0, 2, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 128, 36, 0, 0, 8, 34, 0, 16, 0, 1, 0, 0, 0, 10, 16, 16, 0, 2, 0, 0, 0, 10, 16, 16, 128, 65, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 7, 34, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 40, 0, 0, 5, 66, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 1, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 1, 0, 0, 0, 10, 0, 16, 0, 1, 0, 0, 0, 42, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 1, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 1, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 50, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 67, 1, 64, 0, 0, 0, 0, 128, 193, 50, 0, 0, 15, 242, 0, 16, 0, 1, 0, 0, 0, 118, 7, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 2, 64, 0, 0, 0, 0, 0, 195, 0, 0, 0, 195, 0, 0, 0, 195, 0, 0, 0, 195, 56, 0, 0, 10, 226, 0, 16, 0, 0, 0, 0, 0, 6, 13, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 128, 204, 67, 0, 0, 200, 66, 0, 0, 1, 68, 50, 0, 0, 12, 162, 0, 16, 0, 0, 0, 0, 0, 6, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 149, 67, 0, 0, 0, 0, 0, 0, 149, 67, 86, 13, 16, 0, 0, 0, 0, 0, 0, 0, 0, 10, 162, 0, 16, 0, 0, 0, 0, 0, 86, 13, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 67, 56, 0, 0, 10, 162, 0, 16, 0, 0, 0, 0, 0, 86, 13, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 128, 59, 0, 0, 0, 0, 0, 0, 128, 59, 50, 0, 0, 10, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 149, 67, 42, 0, 16, 128, 65, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 18, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 128, 65, 0, 0, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 80, 67, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 10, 178, 0, 16, 0, 0, 0, 0, 0, 70, 12, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 10, 114, 0, 16, 0, 0, 0, 0, 0, 70, 3, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 0, 0, 56, 0, 0, 10, 114, 32, 16, 0, 0, 0, 0, 0, 22, 6, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 0, 0, 54, 0, 0, 5, 130, 32, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 63, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 22, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 13, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Buffer Definitions: // // cbuffer pixelInfo // { // // int g_srcPixelScalingFactor; // Offset: 0 Size: 4 // // } // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // g_linearSampler sampler NA NA S0 s0 1 // inputTexture texture float4 2d T0 t0 1 // inputTexturePlane1 texture float2 2d T1 t1 1 // pixelInfo cbuffer NA NA CB0 cb2 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_TARGET 0 xyzw 0 TARGET float xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[2:2][1], immediateIndexed, space=0 dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_resource_texture2d (float,float,float,float) T1[1:1], space=0 dcl_input_ps linear v1.xy dcl_output o0.xyzw dcl_temps 2 sample r0.x, v1.xyxx, T0[0].xyzw, S0[0] sample r0.yz, v1.xyxx, T1[1].zxyw, S0[0] itof r0.w, CB0[2][0].x mul r0.x, r0.w, r0.x mul r1.xyzw, r0.wwww, r0.zyzy mad r0.x, r0.x, l(256.000000), l(-16.000000) mad r1.xyzw, r1.xyzw, l(256.000000, 256.000000, 256.000000, 256.000000), l(-128.000000, -128.000000, -128.000000, -128.000000) mul r0.yzw, r1.xxyw, l(0.000000, 409.000000, 100.000000, 516.000000) mad r0.yw, r0.xxxx, l(0.000000, 298.000000, 0.000000, 298.000000), r0.yyyw add r0.yw, r0.yyyw, l(0.000000, 128.000000, 0.000000, 128.000000) mul r0.yw, r0.yyyw, l(0.000000, 0.003906, 0.000000, 0.003906) mad r0.x, r0.x, l(298.000000), -r0.z mad r0.x, -r1.z, l(208.000000), r0.x add r0.x, r0.x, l(128.000000) mul r0.x, r0.x, l(0.003906) max r0.xyw, r0.xyxw, l(0.000000, 0.000000, 0.000000, 0.000000) min r0.xyz, r0.xywx, l(256.000000, 256.000000, 256.000000, 0.000000) mul o0.xyz, r0.yxzy, l(0.003906, 0.003906, 0.003906, 0.000000) mov o0.w, l(1.000000) ret // Approximately 20 instruction slots used #endif const BYTE g_PS2PlaneYUV[] = { 68, 88, 66, 67, 2, 91, 99, 185, 124, 91, 131, 210, 197, 172, 124, 113, 27, 193, 110, 205, 1, 0, 0, 0, 92, 7, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 0, 2, 0, 0, 112, 2, 0, 0, 164, 2, 0, 0, 16, 6, 0, 0, 172, 6, 0, 0, 82, 68, 69, 70, 192, 1, 0, 0, 1, 0, 0, 0, 24, 1, 0, 0, 4, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 152, 1, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 236, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 103, 95, 108, 105, 110, 101, 97, 114, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 80, 108, 97, 110, 101, 49, 0, 112, 105, 120, 101, 108, 73, 110, 102, 111, 0, 171, 171, 12, 1, 0, 0, 1, 0, 0, 0, 48, 1, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 116, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 103, 95, 115, 114, 99, 80, 105, 120, 101, 108, 83, 99, 97, 108, 105, 110, 103, 70, 97, 99, 116, 111, 114, 0, 105, 110, 116, 0, 0, 0, 2, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 1, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 65, 82, 71, 69, 84, 0, 171, 171, 83, 72, 69, 88, 100, 3, 0, 0, 81, 0, 0, 0, 217, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 90, 0, 0, 6, 70, 110, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 69, 0, 0, 11, 18, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 11, 98, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 38, 125, 32, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, 10, 128, 48, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 56, 0, 0, 7, 242, 0, 16, 0, 1, 0, 0, 0, 246, 15, 16, 0, 0, 0, 0, 0, 102, 6, 16, 0, 0, 0, 0, 0, 50, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 67, 1, 64, 0, 0, 0, 0, 128, 193, 50, 0, 0, 15, 242, 0, 16, 0, 1, 0, 0, 0, 70, 14, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 2, 64, 0, 0, 0, 0, 0, 195, 0, 0, 0, 195, 0, 0, 0, 195, 0, 0, 0, 195, 56, 0, 0, 10, 226, 0, 16, 0, 0, 0, 0, 0, 6, 13, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 128, 204, 67, 0, 0, 200, 66, 0, 0, 1, 68, 50, 0, 0, 12, 162, 0, 16, 0, 0, 0, 0, 0, 6, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 149, 67, 0, 0, 0, 0, 0, 0, 149, 67, 86, 13, 16, 0, 0, 0, 0, 0, 0, 0, 0, 10, 162, 0, 16, 0, 0, 0, 0, 0, 86, 13, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, 0, 67, 56, 0, 0, 10, 162, 0, 16, 0, 0, 0, 0, 0, 86, 13, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 128, 59, 0, 0, 0, 0, 0, 0, 128, 59, 50, 0, 0, 10, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 149, 67, 42, 0, 16, 128, 65, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 18, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 128, 65, 0, 0, 0, 1, 0, 0, 0, 1, 64, 0, 0, 0, 0, 80, 67, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 10, 178, 0, 16, 0, 0, 0, 0, 0, 70, 12, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 10, 114, 0, 16, 0, 0, 0, 0, 0, 70, 3, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 0, 0, 56, 0, 0, 10, 114, 32, 16, 0, 0, 0, 0, 0, 22, 6, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 0, 0, 54, 0, 0, 5, 130, 32, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 63, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 20, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // g_linearSampler sampler NA NA S0 s0 1 // inputTexture texture float4 2d T0 t0 1 // inputTexturePlane1 texture float2 2d T1 t1 1 // inputTexturePlane2 texture float2 2d T2 t2 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // TEXCOORD 1 xy 2 NONE int // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_TARGET 0 xyzw 0 TARGET float xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_resource_texture2d (float,float,float,float) T1[1:1], space=0 dcl_resource_texture2d (float,float,float,float) T2[2:2], space=0 dcl_input_ps linear v1.xy dcl_output o0.xyzw dcl_temps 2 sample r0.x, v1.xyxx, T0[0].xyzw, S0[0] sample r0.y, v1.xyxx, T1[1].yxzw, S0[0] sample r0.z, v1.xyxx, T2[2].yzxw, S0[0] mad r0.x, r0.x, l(256.000000), l(-16.000000) mad r0.y, r0.y, l(256.000000), l(-128.000000) mad r0.z, r0.z, l(256.000000), l(-128.000000) mul r0.w, r0.z, l(409.000000) mad r0.w, r0.x, l(298.000000), r0.w add r0.w, r0.w, l(128.000000) mul r0.w, r0.w, l(0.003906) max r0.w, r0.w, l(0.000000) mul r1.xy, r0.yyyy, l(100.000000, 516.000000, 0.000000, 0.000000) mad r0.y, r0.x, l(298.000000), -r1.x mad r0.y, -r0.z, l(208.000000), r0.y add r0.y, r0.y, l(128.000000) mul r0.y, r0.y, l(0.003906) max r0.y, r0.y, l(0.000000) mad r0.x, r0.x, l(298.000000), r1.y add r0.x, r0.x, l(128.000000) mul r0.x, r0.x, l(0.003906) max r0.x, r0.x, l(0.000000) min r0.xyw, r0.xyxw, l(256.000000, 256.000000, 0.000000, 256.000000) mul o0.xyz, r0.wyxw, l(0.003906, 0.003906, 0.003906, 0.000000) mov o0.w, l(1.000000) ret // Approximately 25 instruction slots used #endif const BYTE g_PS3PlaneYUV[] = { 68, 88, 66, 67, 134, 45, 34, 232, 89, 231, 169, 84, 63, 130, 53, 88, 16, 45, 42, 16, 1, 0, 0, 0, 72, 7, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 136, 1, 0, 0, 248, 1, 0, 0, 44, 2, 0, 0, 252, 5, 0, 0, 152, 6, 0, 0, 82, 68, 69, 70, 72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 31, 1, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 236, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 12, 1, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 4, 0, 0, 0, 255, 255, 255, 255, 2, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 103, 95, 108, 105, 110, 101, 97, 114, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 80, 108, 97, 110, 101, 49, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 80, 108, 97, 110, 101, 50, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 171, 73, 83, 71, 78, 104, 0, 0, 0, 3, 0, 0, 0, 8, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 65, 82, 71, 69, 84, 0, 171, 171, 83, 72, 69, 88, 200, 3, 0, 0, 81, 0, 0, 0, 242, 0, 0, 0, 106, 8, 0, 1, 90, 0, 0, 6, 70, 110, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 88, 24, 0, 7, 70, 126, 48, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 85, 85, 0, 0, 0, 0, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 69, 0, 0, 11, 18, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 11, 34, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 22, 126, 32, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 11, 66, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 150, 124, 32, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 96, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 67, 1, 64, 0, 0, 0, 0, 128, 193, 50, 0, 0, 9, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 67, 1, 64, 0, 0, 0, 0, 0, 195, 50, 0, 0, 9, 66, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 67, 1, 64, 0, 0, 0, 0, 0, 195, 56, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 128, 204, 67, 50, 0, 0, 9, 130, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 149, 67, 58, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 56, 0, 0, 10, 50, 0, 16, 0, 1, 0, 0, 0, 86, 5, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 200, 66, 0, 0, 1, 68, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 149, 67, 10, 0, 16, 128, 65, 0, 0, 0, 1, 0, 0, 0, 50, 0, 0, 10, 34, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 128, 65, 0, 0, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 80, 67, 26, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 50, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 149, 67, 26, 0, 16, 0, 1, 0, 0, 0, 0, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 67, 56, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 59, 52, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 51, 0, 0, 10, 178, 0, 16, 0, 0, 0, 0, 0, 70, 12, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 67, 0, 0, 128, 67, 0, 0, 0, 0, 0, 0, 128, 67, 56, 0, 0, 10, 114, 32, 16, 0, 0, 0, 0, 0, 118, 12, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 128, 59, 0, 0, 0, 0, 54, 0, 0, 5, 130, 32, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 128, 63, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 25, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 168, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 24, 0, 0, 0, 1, 0, 0, 0, 116, 0, 0, 0, 28, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 60, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 92, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 104, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 1, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 255, 255, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0 }; ================================================ FILE: include/BlockAllocators.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include //================================================================================================ // Allocator classes must implement: // _BlockType Allocate(_SizeType size) // void Deallocate(const _BlockType &block) // bool IsOwner(const _BlockType &block) const // void Reset() // // _SizeType must be initializable to 0 // // _BlockType classes must implement the following functions: // // represents an empty block // // _SizeType GetSize() const // empty blocks should return 0 // _SizeType GetOffset() const // empty blocks should return 0 //================================================================================================ namespace BlockAllocators { //================================================================================================ // Generic block class compatible with any allocator class. // Can be used directly or as a base class for custom allocators. template class CGenericBlock { _SizeType m_offset = 0; _SizeType m_size = 0; public: CGenericBlock() {} CGenericBlock(_SizeType offset, _SizeType size) : m_size(size) , m_offset(offset) {} CGenericBlock(const CGenericBlock &, _SizeType newOffset, _SizeType newSize) : m_size(newSize) , m_offset(newOffset) {} bool operator==(const CGenericBlock &o) const { return m_size == o.m_size && m_offset == o.m_offset; } bool IsSuballocatedFrom(const CGenericBlock &o) const { return m_offset >= o.m_offset && m_offset + m_size <= o.m_offset + o.m_size && m_offset + m_size >= m_offset // overflow check && o.m_offset + o.m_size >= o.m_offset; // overflow check } _SizeType GetSize() const { return m_size; } _SizeType GetOffset() const { return m_offset; } }; //================================================================================================ // Allocates a block with 0 size and 0 offset // // Template parameters // _BlockType - Block class type // _SizeType - Offset and Size types used by the block template class CNullBlockAllocator { public: _BlockType Allocate(_SizeType size) { return {}; /* Returns a default block*/ } void Deallocate(_BlockType &block) { assert(block.GetSize() == 0); } bool IsOwner(_BlockType &block) const { return block.GetSize() == 0 && block.GetOffset() == 0; } void Reset() {} }; //================================================================================================ // Allocates uniform blocks out of a stack-based pool if available and returns blocks // back to the pool on deallocate. // // On allocate the outer allocator is used if the pool is empty. // // On deallocate the outer allocator is used if the pool is full. // // Template parameters // _BlockType - Block class type // _SizeType - Offset and Size types used by the block // _OuterAllocator - Allocator to fall-back to when the pool is unavailable // _MaxItems - Integer indicating the limit of number of stored blocks in the pool template class CPooledBlockAllocator { private: SIZE_T m_numFreeBlocks = 0; SIZE_T m_freeStackSize = 0; _BlockType *m_pFreeBlockStack = nullptr; // Holds pointers to the free blocks _OuterAllocator &m_outerAllocator; _SizeType m_blockSize = 0; public: CPooledBlockAllocator(_OuterAllocator &outerAllocator, _SizeType blockSize, SIZE_T maxItems); ~CPooledBlockAllocator(); // Required Allocator functions _BlockType Allocate(_SizeType size); void Deallocate(const _BlockType &block); bool IsOwner(const _BlockType &block) const { return block.GetSize() == m_blockSize; } void Reset(); }; //================================================================================================ inline UINT Log2Ceil(UINT64 value) { UINT ceil; unsigned char bsrResult; if (value == 0) { return (UINT)-1; } #if defined(_WIN64) bsrResult = _BitScanReverse64((ULONG *)&ceil, value); assert(ceil != (UINT)-1); assert(bsrResult != 0); #else UINT upper = (UINT)(value >> 32); if (0 == upper) { UINT lower = (UINT)value; bsrResult = _BitScanReverse((ULONG *)&ceil, lower); assert(ceil != (UINT)-1); assert(bsrResult != 0); } else { bsrResult = _BitScanReverse((ULONG *)&ceil, upper); assert(ceil != (UINT)-1); assert(bsrResult != 0); ceil += 32; } #endif // Add 1 unless value is a power of two return ceil + (value & (value - 1) ? 1 : 0); } //================================================================================================ inline UINT Log2Ceil(UINT32 value) { UINT ceil; if (0 == _BitScanReverse((ULONG *)&ceil, value)) { ceil = (UINT)-1; } // Add 1 unless value is a power of two return ceil + (value & (value - 1) ? 1 : 0); } #ifndef _WIN64 inline UINT32 Log2Ceil(SIZE_T value) { return Log2Ceil(static_cast(value)); } #endif //================================================================================================ template class PoolingStdAllocatorData { std::vector> m_pools; BYTE* m_pHead = nullptr; template void AllocPool() // throw(std::bad_alloc) { constexpr size_t PoolSizeInBytes = sizeof(T) * PoolSizeInElements; assert(m_pHead == nullptr); m_pools.push_back(std::unique_ptr(new BYTE[PoolSizeInBytes])); // throw(std::bad_alloc) m_pHead = m_pools.back().get(); // Turn the block into a linked list auto pCurPointer = reinterpret_cast(m_pHead); auto pNextPointer = m_pHead + sizeof(T); for (size_t i = 0; i < PoolSizeInElements - 1; ++i) { *pCurPointer = pNextPointer; pCurPointer = reinterpret_cast(pNextPointer); pNextPointer += sizeof(T); } assert(pNextPointer == m_pHead + PoolSizeInBytes); *pCurPointer = nullptr; } public: template T* allocate(size_t size) // throw(std::bad_alloc) { #if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL != 0 return reinterpret_cast(::operator new(sizeof(T) * size)); #else UNREFERENCED_PARAMETER(size); assert(size == 1); if (m_pHead == nullptr) { AllocPool(); // throw(std::bad_alloc) } T* pRet = reinterpret_cast(m_pHead); m_pHead = *reinterpret_cast(m_pHead); return pRet; #endif } template void deallocate(T* p, size_t) { #if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL != 0 return ::operator delete(p); #else #if TRANSLATION_LAYER_DBG memset(p, 0xAF, sizeof(T)); #endif *reinterpret_cast(p) = m_pHead; m_pHead = reinterpret_cast(p); #endif } }; //================================================================================================ // An STL-compatible allocator for pooling data template class PoolingStdAllocator { public: using UnderlyingAllocatorImpl = PoolingStdAllocatorData; using value_type = T; template struct rebind { using other = PoolingStdAllocator; }; UnderlyingAllocatorImpl* m_pAllocator; T* allocate(size_t size) { return m_pAllocator->allocate(size); } void deallocate(T* p, size_t size) { m_pAllocator->deallocate(p, size); } using propagate_on_container_copy_assignment = std::true_type; using propagate_on_container_move_assignment = std::true_type; using propagate_on_container_swap = std::true_type; bool operator==(PoolingStdAllocator const& o) const { return o.m_pAllocator == m_pAllocator; } bool operator!=(PoolingStdAllocator const& o) const { return o.m_pAllocator != m_pAllocator; } explicit PoolingStdAllocator(UnderlyingAllocatorImpl& underlying) : m_pAllocator(&underlying) { } template PoolingStdAllocator(PoolingStdAllocator const& other) : m_pAllocator(other.m_pAllocator) { } template PoolingStdAllocator& operator=(PoolingStdAllocator const& other) { m_pAllocator = other.m_pAllocator; return *this; } }; //================================================================================================ // Allocates blocks from a fixed range using buddy allocation method. // Buddy allocation allows reasonably fast allocation of arbitrary size blocks // with minimal fragmentation and provides efficient reuse of freed ranges. // // Template parameters // _BlockType - Block class type // _SizeType - Offset and Size types used by the block // _MinBlockSise - Smallest allocatable block size template class CBuddyAllocator { std::unique_ptr> m_pAllocatorData; using SetType = std::set<_SizeType, std::less<_SizeType>, PoolingStdAllocator<_SizeType>>; std::vector m_freeBlocks; UINT m_maxOrder = 0; _SizeType m_baseOffset = 0; _SizeType m_maxBlockSize = 0; inline _SizeType SizeToUnitSize(_SizeType size) const { return (size + (_MinBlockSize - 1)) / _MinBlockSize; } inline UINT UnitSizeToOrder(_SizeType size) const { return Log2Ceil(size); } inline _SizeType GetBuddyOffset(const _SizeType &offset, const _SizeType &size) { return offset ^ size; } inline _SizeType OrderToUnitSize(UINT order) const { return ((_SizeType)1) << order; } inline _SizeType AllocateBlock(UINT order); // throw(std::bad_alloc) inline void DeallocateBlock(_SizeType offset, UINT order); // throw(std::bad_alloc) public: CBuddyAllocator(_SizeType maxBlockSize, _SizeType baseOffset = 0); // throw(std::bad_alloc) CBuddyAllocator() = default; // Noncopyable CBuddyAllocator(CBuddyAllocator const&) = delete; CBuddyAllocator(CBuddyAllocator&&) = default; CBuddyAllocator& operator=(CBuddyAllocator const&) = delete; CBuddyAllocator& operator=(CBuddyAllocator&&) = default; inline _BlockType Allocate(_SizeType size); inline void Deallocate(_In_ const _BlockType &block); inline bool IsOwner(_In_ const _BlockType &block) const { return block.GetOffset() >= m_baseOffset && block.GetSize() <= m_maxBlockSize; } inline void Reset() { // Clear the free blocks collection m_freeBlocks.clear(); // Initialize the pool with a free inner block of max inner block size m_freeBlocks.reserve(m_maxOrder + 1); // throw(std::bad_alloc) for (UINT i = 0; i <= m_maxOrder; ++i) { m_freeBlocks.emplace_back(PoolingStdAllocator<_SizeType>(*m_pAllocatorData)); } m_freeBlocks[m_maxOrder].insert((_SizeType)0); // throw(std::bad_alloc) } }; //================================================================================================ // Categorizes allocations into buckets based on conversion from size to bucket index using // functor _SizeToBucketFunc template class CBucketizedBlockAllocator { _OuterAllocator *m_pAllocators = nullptr; SIZE_T m_numAllocators = 0; public: CBucketizedBlockAllocator(SIZE_T numAllocators, _OuterAllocator *pAllocators); _BlockType Allocate(_SizeType size); void Deallocate(const _BlockType &block); bool IsOwner(const _BlockType &block) const; void Reset(); }; //================================================================================================ // Uses a buddy allocator to allocate offsets from a virtual resource, with an inner allocator // which allocates disjoint resources at a size threshold. template class CDisjointBuddyAllocator { private: typedef CBuddyAllocator<_BlockType, _SizeType, _MinBlockSize> BuddyAllocatorType; BuddyAllocatorType m_BuddyAllocator; _InnerAllocator m_InnerAllocator; using InnerAllocatorDecayed = typename std::decay<_InnerAllocator>::type; public: typedef typename std::invoke_result::type AllocationType; private: struct RefcountedAllocation { UINT m_Refcount = 0; AllocationType m_Allocation = AllocationType{}; }; std::vector m_Allocations; _SizeType m_Threshold = 0; inline UINT BucketFromOffset(_SizeType offset) const { return UINT(offset / m_Threshold); } public: template CDisjointBuddyAllocator(_SizeType maxBlockSize, _SizeType threshold, InnerAllocatorArgs&&... innerArgs); // throw(std::bad_alloc) CDisjointBuddyAllocator() = default; CDisjointBuddyAllocator(CDisjointBuddyAllocator&&) = default; CDisjointBuddyAllocator& operator=(CDisjointBuddyAllocator&&) = default; _BlockType Allocate(_SizeType size); void Deallocate(const _BlockType &block); bool IsOwner(const _BlockType &block) const; void Reset(); AllocationType GetInnerAllocation(const _BlockType &block) const; _SizeType GetInnerAllocationOffset(const _BlockType &block) const; }; //================================================================================================ // On allocate uses the _BelowOrEqualAllocator if the size is <= _ThresholdValue and the // _AboveAllocator if the size is > _ThresholdValue template class CThresholdAllocator { private: _BelowOrEqualAllocator m_BelowAllocator; _AboveAllocator m_AboveAllocator; using BelowDecayed = typename std::decay<_BelowOrEqualAllocator>::type; using AboveDecayed = typename std::decay<_AboveAllocator>::type; public: template CThresholdAllocator(Below&& below, Above&& above) : m_BelowAllocator(std::forward(below)), m_AboveAllocator(std::forward(above)) { } _BlockType Allocate(_SizeType size) { if (size <= _ThresholdValue) { return m_BelowAllocator.Allocate(size); } else { return m_AboveAllocator.Allocate(size); } } void Deallocate(const _BlockType &block) { assert(IsOwner(block)); if (block.GetSize() <= _ThresholdValue) { m_BelowAllocator.Deallocate(block); } else { m_AboveAllocator.Deallocate(block); } } bool IsOwner(const _BlockType &block) const { if (block.GetSize() <= _ThresholdValue) { return m_BelowAllocator.IsOwner(block); } else { return m_AboveAllocator.IsOwner(block); } } void Reset() { m_BelowAllocator.Reset(); m_AboveAllocator.Reset(); } auto GetInnerAllocation(const _BlockType &block) const { assert(IsOwner(block)); if (block.GetSize() <= _ThresholdValue) { return m_BelowAllocator.GetInnerAllocation(block); } else { return m_AboveAllocator.GetInnerAllocation(block); } } _SizeType GetInnerAllocationOffset(const _BlockType &block) const { assert(IsOwner(block)); if (block.GetSize() <= _ThresholdValue) { return m_BelowAllocator.GetInnerAllocationOffset(block); } else { return m_AboveAllocator.GetInnerAllocationOffset(block); } } }; } // namespace BlockAllocators ================================================ FILE: include/BlockAllocators.inl ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace BlockAllocators { //------------------------------------------------------------------------------------------------ template CPooledBlockAllocator<_BlockType, _OuterAllocator, _SizeType>::CPooledBlockAllocator(_OuterAllocator &outerAllocator, _SizeType blockSize, SIZE_T maxItems) : m_freeStackSize(maxItems) , m_numFreeBlocks(0) , m_outerAllocator(outerAllocator) , m_blockSize(blockSize) { m_pFreeBlockStack = new _BlockType[maxItems]; // throw(std::bad_alloc) } //------------------------------------------------------------------------------------------------ template CPooledBlockAllocator<_BlockType, _OuterAllocator, _SizeType>::~CPooledBlockAllocator() { Reset(); delete[] m_pFreeBlockStack; } //------------------------------------------------------------------------------------------------ template _BlockType CPooledBlockAllocator<_BlockType, _OuterAllocator, _SizeType>::Allocate(_SizeType size) { if (size > m_blockSize) { // Can't allocate the requested size so return an empty block return _BlockType(); } if (m_numFreeBlocks > 0) { m_numFreeBlocks--; return m_pFreeBlockStack[m_numFreeBlocks]; } else { return m_outerAllocator.Allocate(m_blockSize); } } //------------------------------------------------------------------------------------------------ template void CPooledBlockAllocator<_BlockType, _OuterAllocator, _SizeType>::Deallocate(const _BlockType &block) { if (m_numFreeBlocks < m_freeStackSize) { m_pFreeBlockStack[m_numFreeBlocks] = block; m_numFreeBlocks++; } else { m_outerAllocator.Deallocate(block); } } //------------------------------------------------------------------------------------------------ template void CPooledBlockAllocator<_BlockType, _OuterAllocator, _SizeType>::Reset() { // Deallocate all the pooled objects for (unsigned i = 0; i < m_numFreeBlocks; ++i) { m_outerAllocator.Deallocate(m_pFreeBlockStack[i]); } m_numFreeBlocks = 0; } //================================================================================================ // class CBuddyAllocator //================================================================================================ //------------------------------------------------------------------------------------------------ template _SizeType CBuddyAllocator<_BlockType, _SizeType, _MinBlockSize>::AllocateBlock(UINT order) // throw(std::bad_alloc) { _SizeType offset; if (order > m_maxOrder) { throw(std::bad_alloc()); // Can't allocate a block that large } auto it = m_freeBlocks[order].begin(); if (it == m_freeBlocks[order].end()) { // No free nodes in the requested pool. Try to find a higher-order block and split it. _SizeType left = AllocateBlock(order + 1); // throw(std::bad_alloc) _SizeType size = OrderToUnitSize(order); _SizeType right = left + size; m_freeBlocks[order].insert(right); // Add the right block to the free pool offset = left; // Return the left block } else { offset = *it; // Remove the block from the free list m_freeBlocks[order].erase(it); } return offset; } //------------------------------------------------------------------------------------------------ template void CBuddyAllocator<_BlockType, _SizeType, _MinBlockSize>::DeallocateBlock(_SizeType offset, UINT order) // throw(std::bad_alloc) { // See if the buddy block is free _SizeType size = OrderToUnitSize(order); _SizeType buddy = GetBuddyOffset(offset, size); auto it = m_freeBlocks[order].find(buddy); if (it != m_freeBlocks[order].end()) { // Deallocate merged blocks DeallocateBlock(min(offset, buddy), order + 1); // throw(std::bad_alloc) // Remove the buddy from the free list m_freeBlocks[order].erase(it); } else { // Add the block to the free list m_freeBlocks[order].insert(offset); // throw(std::bad_alloc) } } //------------------------------------------------------------------------------------------------ template CBuddyAllocator<_BlockType, _SizeType, _MinBlockSize>::CBuddyAllocator(_SizeType maxBlockSize, _SizeType baseOffset) // throw(std::bad_alloc) : m_baseOffset(baseOffset) , m_maxBlockSize(maxBlockSize) , m_pAllocatorData(new typename decltype(m_pAllocatorData)::element_type) // throw(std::bad_alloc) { // maxBlockSize should be evenly dividable by _MinBlockSize and // maxBlockSize / _MinBlockSize should be a power of two assert((maxBlockSize / _MinBlockSize) * _MinBlockSize == maxBlockSize); // Evenly dividable assert(0 == ((maxBlockSize / _MinBlockSize) & ((maxBlockSize / _MinBlockSize) - 1))); // Power of two m_maxOrder = UnitSizeToOrder(SizeToUnitSize(maxBlockSize)); Reset(); } //------------------------------------------------------------------------------------------------ template _BlockType CBuddyAllocator<_BlockType, _SizeType, _MinBlockSize>::Allocate(_SizeType size) { _SizeType unitSize = SizeToUnitSize(size); UINT order = UnitSizeToOrder(unitSize); try { _SizeType offset = AllocateBlock(order); // throw(std::bad_alloc) return _BlockType(m_baseOffset + (offset * _MinBlockSize), OrderToUnitSize(order) * _MinBlockSize); } catch (std::bad_alloc&) { // There are no blocks available for the requested size so // return the NULL block type return _BlockType(0, 0); } } //------------------------------------------------------------------------------------------------ template void CBuddyAllocator<_BlockType, _SizeType, _MinBlockSize>::Deallocate(_In_ const _BlockType &block) { assert(IsOwner(block)); _SizeType offset = SizeToUnitSize(block.GetOffset() - m_baseOffset); _SizeType size = SizeToUnitSize(block.GetSize()); UINT order = UnitSizeToOrder(size); try { DeallocateBlock(offset, order); // throw(std::bad_alloc) } catch (std::bad_alloc&) { // Deallocate failed trying to add the free block to the pool // resulting in a leak. Unfortunately there is not much we can do. // Fortunately this is expected to be extremely rare as the storage // needed for each deallocate is very small. } } //================================================================================================ // class CBucketizedBlockAllocator //================================================================================================ //------------------------------------------------------------------------------------------------ template CBucketizedBlockAllocator<_OuterAllocator, _SizeToBucketFunc, _BlockType, _SizeType>::CBucketizedBlockAllocator(SIZE_T numAllocators, _OuterAllocator *pAllocators) : m_pAllocators(pAllocators) , m_numAllocators(numAllocators) { } template _BlockType CBucketizedBlockAllocator<_OuterAllocator, _SizeToBucketFunc, _BlockType, _SizeType>::Allocate(_SizeType size) { _SizeToBucketFunc SizeToBucketFunc; SIZE_T bucket = SizeToBucketFunc(size); return bucket < m_numAllocators ? m_pAllocators[bucket].Allocate(size) : _BlockType(); } template void CBucketizedBlockAllocator<_OuterAllocator, _SizeToBucketFunc, _BlockType, _SizeType>::Deallocate(const _BlockType &block) { _SizeToBucketFunc SizeToBucketFunc; SIZE_T bucket = SizeToBucketFunc(block.GetSize()); if (bucket < m_numAllocators) { m_pAllocators[bucket].Deallocate(block); } } template bool CBucketizedBlockAllocator<_OuterAllocator, _SizeToBucketFunc, _BlockType, _SizeType>::IsOwner(const _BlockType &block) const { _SizeToBucketFunc SizeToBucketFunc; SIZE_T bucket = SizeToBucketFunc(block.GetSize()); return bucket < this->m_numBuckets ? m_pAllocators[bucket].IsOwner(block) : false; } template void CBucketizedBlockAllocator<_OuterAllocator, _SizeToBucketFunc, _BlockType, _SizeType>::Reset() { // Reset each of the outer allocators for (SIZE_T bucket = 0; bucket < m_numAllocators; ++bucket) { m_pAllocators[bucket].Reset(); } } //================================================================================================ // class CDisjointBuddyAllocator //================================================================================================ template template CDisjointBuddyAllocator<_BlockType, _InnerAllocator, _SizeType, _MinBlockSize>::CDisjointBuddyAllocator(_SizeType maxBlockSize, _SizeType threshold, InnerAllocatorArgs&&... innerArgs) // throw(std::bad_alloc) : m_BuddyAllocator(maxBlockSize) , m_Threshold(threshold) , m_InnerAllocator(std::forward(innerArgs)...) { } template _BlockType CDisjointBuddyAllocator<_BlockType, _InnerAllocator, _SizeType, _MinBlockSize>::Allocate(_SizeType size) // throw(std::bad_alloc) { if (size > m_Threshold) { throw(std::bad_alloc()); // Can't allocate a block that large } _BlockType block = m_BuddyAllocator.Allocate(size); // throw(std::bad_alloc) if (block.GetSize() == 0) { return block; } // This is just a way of making sure the deleter gets called unless release() gets called. std::unique_ptr<_BlockType, std::function> blockGuard(&block, [this](_BlockType* b) { m_BuddyAllocator.Deallocate(*b); } ); _SizeType offset = block.GetOffset(); UINT bucket = BucketFromOffset(offset); if (bucket >= m_Allocations.size()) { m_Allocations.resize(bucket + 1); // throw(std::bad_alloc) } if (m_Allocations[bucket].m_Refcount == 0) { m_Allocations[bucket].m_Allocation = m_InnerAllocator.Allocate(m_Threshold); // throw(std::bad_alloc) } // No more exceptions m_Allocations[bucket].m_Refcount++; blockGuard.release(); return block; } template void CDisjointBuddyAllocator<_BlockType, _InnerAllocator, _SizeType, _MinBlockSize>::Deallocate(const _BlockType &block) { assert(IsOwner(block)); _SizeType offset = block.GetOffset(); UINT bucket = BucketFromOffset(offset); assert(bucket < m_Allocations.size()); if (--m_Allocations[bucket].m_Refcount == 0) { m_InnerAllocator.Deallocate(m_Allocations[bucket].m_Allocation); } m_BuddyAllocator.Deallocate(block); } template bool CDisjointBuddyAllocator<_BlockType, _InnerAllocator, _SizeType, _MinBlockSize>::IsOwner(const _BlockType &block) const { return m_BuddyAllocator.IsOwner(block); } template void CDisjointBuddyAllocator<_BlockType, _InnerAllocator, _SizeType, _MinBlockSize>::Reset() { for (RefcountedAllocation& Allocation : m_Allocations) { if (Allocation.m_Refcount > 0) { m_InnerAllocator.Deallocate(Allocation.m_Allocation); } } m_Allocations.clear(); m_BuddyAllocator.Reset(); m_InnerAllocator.Reset(); } template auto CDisjointBuddyAllocator<_BlockType, _InnerAllocator, _SizeType, _MinBlockSize>::GetInnerAllocation(const _BlockType &block) const -> AllocationType { assert(IsOwner(block)); _SizeType offset = block.GetOffset(); UINT bucket = BucketFromOffset(offset); assert(bucket < m_Allocations.size()); assert(m_Allocations[bucket].m_Refcount > 0); return m_Allocations[bucket].m_Allocation; } template _SizeType CDisjointBuddyAllocator<_BlockType, _InnerAllocator, _SizeType, _MinBlockSize>::GetInnerAllocationOffset(const _BlockType &block) const { assert(IsOwner(block)); _SizeType offset = block.GetOffset(); return offset % m_Threshold; } } // namespace BlockAllocators ================================================ FILE: include/CommandListManager.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { LONGLONG InterlockedRead64(volatile LONGLONG* p); class ImmediateContext; class CommandListManager { public: CommandListManager(ImmediateContext *pParent, ID3D12CommandQueue *pQueue, COMMAND_LIST_TYPE type); ~CommandListManager(); void AdditionalCommandsAdded() noexcept; void DrawCommandAdded() noexcept; void DispatchCommandAdded() noexcept; void UploadHeapSpaceAllocated(UINT64 heapSize) noexcept; void ReadbackInitiated() noexcept; void SubmitCommandListIfNeeded(); void SetNeedSubmitFence() noexcept { m_bNeedSubmitFence = true; } bool HasCommands() const noexcept { return m_NumCommands > 0; } bool NeedSubmitFence() const noexcept { return m_bNeedSubmitFence; } bool ShouldFlushForResourceAcquire() const noexcept { return HasCommands() || NeedSubmitFence(); } template void ExecuteCommandQueueCommand(TFunc&& func) { m_bNeedSubmitFence = true; m_pResidencySet->Close(); m_pParent->GetResidencyManager().SubmitCommandQueueCommand( m_pCommandQueue.get(), (UINT)m_type, m_pResidencySet.get(), std::forward(func)); ResetResidencySet(); } HRESULT PreExecuteCommandQueueCommand(); //throws HRESULT PostExecuteCommandQueueCommand(); //throws void SubmitCommandList(); void InitCommandList(); void ResetCommandList(); void CloseCommandList() { CloseCommandList(nullptr); } void DiscardCommandList(); void ResetResidencySet(); void PrepForCommandQueueSync(); // Returns true if synchronization was successful, false likely means device is removed bool WaitForCompletion(); bool WaitForFenceValue(UINT64 FenceValue); UINT64 GetCompletedFenceValue() noexcept { return m_Fence.GetCompletedValue(); } HRESULT EnqueueSetEvent(HANDLE hEvent) noexcept; UINT64 EnsureFlushedAndFenced(); HANDLE GetEvent() noexcept { return m_hWaitEvent; } void AddResourceToResidencySet(Resource *pResource); UINT64 GetCommandListID() { return m_commandListID; } UINT64 GetCommandListIDInterlockedRead() { return InterlockedRead64((volatile LONGLONG*)&m_commandListID); } _Out_range_(0, COMMAND_LIST_TYPE::MAX_VALID - 1) COMMAND_LIST_TYPE GetCommandListType() { return m_type; } ID3D12CommandQueue* GetCommandQueue() { return m_pCommandQueue.get(); } ID3D12CommandList* GetCommandList() { return m_pCommandList.get(); } ID3D12SharingContract* GetSharingContract() { return m_pSharingContract.get(); } Fence* GetFence() { return &m_Fence; } ID3D12VideoDecodeCommandList2* GetVideoDecodeCommandList(ID3D12CommandList *pCommandList = nullptr) { return m_type == COMMAND_LIST_TYPE::VIDEO_DECODE ? static_cast(pCommandList ? pCommandList : m_pCommandList.get()) : nullptr; } ID3D12VideoProcessCommandList2* GetVideoProcessCommandList(ID3D12CommandList *pCommandList = nullptr) { return m_type == COMMAND_LIST_TYPE::VIDEO_PROCESS ? static_cast(pCommandList ? pCommandList : m_pCommandList.get()) : nullptr; } ID3D12GraphicsCommandList* GetGraphicsCommandList(ID3D12CommandList *pCommandList = nullptr) { return m_type == COMMAND_LIST_TYPE::GRAPHICS ? static_cast(pCommandList ? pCommandList : m_pCommandList.get()) : nullptr; } bool WaitForFenceValueInternal(bool IsImmediateContextThread, UINT64 FenceValue); bool ComputeOnly() {return !!(m_pParent->FeatureLevel() == D3D_FEATURE_LEVEL_1_0_CORE);} private: void ResetCommandListTrackingData() { m_NumCommands = 0; m_NumDraws = 0; m_NumDispatches = 0; m_UploadHeapSpaceAllocated = 0; } void SubmitCommandListImpl(); ImmediateContext* const m_pParent; // weak-ref const COMMAND_LIST_TYPE m_type; unique_comptr m_pCommandList; unique_comptr m_pCommandAllocator; unique_comptr m_pCommandQueue; unique_comptr m_pSharingContract; Fence m_Fence{m_pParent, FENCE_FLAG_NONE, 0}; #if TRANSLATION_LAYER_DBG Fence m_StallFence{m_pParent, FENCE_FLAG_NONE, 0}; #endif std::unique_ptr m_pResidencySet; UINT m_NumFlushesWithNoReadback = 0; UINT m_NumCommands = 0; UINT m_NumDraws = 0; UINT m_NumDispatches = 0; UINT64 m_UploadHeapSpaceAllocated = 0; bool m_bNeedSubmitFence; ThrowingSafeHandle m_hWaitEvent; // The more upload heap space allocated in a command list, the more memory we are // potentially holding up that could have been recycled into the pool. If too // much is held up, flush the command list static constexpr UINT cMaxAllocatedUploadHeapSpacePerCommandList = 256 * 1024 * 1024; DWORD m_MaxAllocatedUploadHeapSpacePerCommandList; // Command allocator pools CBoundedFencePool< unique_comptr > m_AllocatorPool; // Some notes on threading related to this command list ID / fence value. // The fence value is and should only ever be written by the immediate context thread. // The immediate context thread may read the fence value through GetCommandListID(). // Other threads may read this value, but should only do so via CommandListIDInterlockedRead(). UINT64 m_commandListID = 1; // Number of maximum in-flight command lists at a given time static constexpr UINT GetMaxInFlightDepth(COMMAND_LIST_TYPE type) { switch (type) { case COMMAND_LIST_TYPE::VIDEO_DECODE: return 16; default: return 1024; } }; void SubmitFence() noexcept; void CloseCommandList(ID3D12CommandList *pCommandList); void PrepareNewCommandList(); void IncrementFence(); void UpdateLastUsedCommandListIDs(); D3D12_COMMAND_LIST_TYPE GetD3D12CommandListType(COMMAND_LIST_TYPE type); }; } // namespace D3D12TranslationLayer ================================================ FILE: include/D3D12TranslationLayerDependencyIncludes.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once // The Windows build uses DBG for debug builds, but Visual Studio defaults to NDEBUG for retail // We'll pick TRANSLATION_LAYER_DBG for CMake (VS) builds, and we'll convert DBG to that here // for Windows builds #if DBG #define TRANSLATION_LAYER_DBG 1 #endif //SDK Headers #define NOMINMAX #define WIN32_LEAN_AND_MEAN #define _ATL_NO_WIN_SUPPORT #include #include #include #include // This defines NTSTATUS and other types that are needed for kernel headers #include #define INITGUID #include #include #include #undef INITGUID #include #include #include #include #include //STL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using std::min; using std::max; #ifndef assert #include #endif #include #include #include #define INITGUID #include ================================================ FILE: include/D3D12TranslationLayerIncludes.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #ifdef _WIN64 #define USE_PIX_ON_ALL_ARCHITECTURES #endif #ifdef USE_PIX #include #endif //Library Headers #define TRANSLATION_API #include "VideoViewHelper.hpp" #include "SubresourceHelpers.hpp" #include "Util.hpp" #include "DeviceChild.hpp" #include #include "Allocator.h" #include "XPlatHelpers.h" #include #include #include #include #include "MaxFrameLatencyHelper.hpp" #include "Shader.hpp" #include "Sampler.hpp" #include "View.hpp" #include "PipelineState.hpp" #include "SwapChainManager.hpp" #include "ResourceBinding.hpp" #include "Fence.hpp" #include "Residency.h" #include "ResourceState.hpp" #include "RootSignature.hpp" #include "Resource.hpp" #include "Query.hpp" #include "ResourceCache.hpp" #include "BlitHelper.hpp" #include "ImmediateContext.hpp" #include "BatchedContext.hpp" #include "BatchedResource.hpp" #include "BatchedQuery.hpp" #include "CommandListManager.hpp" #include "VideoDecodeStatistics.hpp" #include "VideoReferenceDataManager.hpp" #include "VideoDecode.hpp" #include "VideoDevice.hpp" #include "VideoProcess.hpp" #include "VideoProcessEnum.hpp" #include "View.inl" #include "Sampler.inl" #include "Shader.inl" #include "ImmediateContext.inl" #include "CommandListManager.inl" #include #ifndef MICROSOFT_TELEMETRY_ASSERT #define MICROSOFT_TELEMETRY_ASSERT(x) assert(x) #endif namespace D3D12TranslationLayer { extern TraceLoggingHProvider g_hTracelogging; void SetTraceloggingProvider(TraceLoggingHProvider hTracelogging); } ================================================ FILE: include/DXGIColorSpaceHelper.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once // ---------------------------------------------------------------------------- // // CDXGIColorSpaceHelper // // ---------------------------------------------------------------------------- class CDXGIColorSpaceHelper { public: //---------------------------------------------------------------------------------------------------------------------------------- static bool IsRGBColorSpace(DXGI_COLOR_SPACE_TYPE ColorSpace) { switch (ColorSpace) { case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709: case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709: case DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709: case DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P2020: case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: case DXGI_COLOR_SPACE_RGB_STUDIO_G2084_NONE_P2020: case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P2020: case DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709: case DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020: return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- static bool IsStudioColorSpace(DXGI_COLOR_SPACE_TYPE ColorSpace) { switch (ColorSpace) { case DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709: case DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G2084_LEFT_P2020: case DXGI_COLOR_SPACE_RGB_STUDIO_G2084_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_TOPLEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G2084_TOPLEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_GHLG_TOPLEFT_P2020: case DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709: case DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P709: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020: return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- static bool Is709ColorSpace(DXGI_COLOR_SPACE_TYPE ColorSpace) { switch (ColorSpace) { case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709: case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709: case DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709: case DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709: case DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P709: return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- static bool Is2020ColorSpace(DXGI_COLOR_SPACE_TYPE ColorSpace) { switch (ColorSpace) { case DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P2020: case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G2084_LEFT_P2020: case DXGI_COLOR_SPACE_RGB_STUDIO_G2084_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_TOPLEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G2084_TOPLEFT_P2020: case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_GHLG_TOPLEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_FULL_GHLG_TOPLEFT_P2020: case DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020: return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- static bool Is601ColorSpace(DXGI_COLOR_SPACE_TYPE ColorSpace) { switch (ColorSpace) { case DXGI_COLOR_SPACE_YCBCR_FULL_G22_NONE_P709_X601: case DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601: case DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601: return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- static DXGI_COLOR_SPACE_TYPE ConvertFromLegacyColorSpace(bool RGB, UINT BitsPerElement, bool StudioRGB, bool P709, bool StudioYUV) { if (RGB) { if (BitsPerElement > 32) { // All 16 bit color channel data is assumed to be linear rather than SRGB return DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709; } else { if (StudioRGB) { return DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709; } else { return DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; } } } else { if (P709) { if (StudioYUV) { return DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709; } else { return DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709; } } else { if (StudioYUV) { return DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601; } else { return DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601; } } } } //---------------------------------------------------------------------------------------------------------------------------------- static DXGI_COLOR_SPACE_TYPE GetDefaultColorSpaceFromFormat(bool RGB, UINT BitsPerElement) { if (RGB) { if (BitsPerElement > 32) { // All 16 bit color channel data is assumed to be linear rather than SRGB return DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709; } else { return DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; } } else { return DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601; } } }; ================================================ FILE: include/DeviceChild.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class ImmediateContext; class DeviceChild { public: DeviceChild(ImmediateContext* pParent) noexcept : m_pParent(pParent) { } ImmediateContext* m_pParent = nullptr; UINT64 m_LastUsedCommandListID[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; bool m_bWaitForCompletionRequired = true; // Warning: this method is hidden in some derived child types, and is not virtual // Always ensure that this method is called on the most derived type. void UsedInCommandList(_In_range_(0, COMMAND_LIST_TYPE::MAX_VALID - 1) COMMAND_LIST_TYPE CommandListType, UINT64 CommandListID) noexcept { assert(CommandListType < COMMAND_LIST_TYPE::MAX_VALID); assert(CommandListID >= m_LastUsedCommandListID[(UINT)CommandListType]); m_LastUsedCommandListID[(UINT)CommandListType] = CommandListID; } void MarkUsedInCommandListIfNewer(COMMAND_LIST_TYPE CommandListType, UINT64 CommandListID) noexcept { if (CommandListID >= m_LastUsedCommandListID[(UINT)CommandListType]) { UsedInCommandList(CommandListType, CommandListID); } } void ResetLastUsedInCommandList() { ZeroMemory(m_LastUsedCommandListID, sizeof(m_LastUsedCommandListID)); } protected: template void AddToDeferredDeletionQueue(unique_comptr& spObject) { if (spObject) { AddToDeferredDeletionQueue(spObject.get()); spObject.reset(); } } template void AddToDeferredDeletionQueue(unique_comptr& spObject, COMMAND_LIST_TYPE CommandListType, UINT64 CommandListID) { // Convert from our existing array of command list IDs into a single command list ID // for the specified command list type, by resetting everything and marking just the one. ResetLastUsedInCommandList(); MarkUsedInCommandListIfNewer(CommandListType, CommandListID); AddToDeferredDeletionQueue(spObject); } template void AddToDeferredDeletionQueue(unique_comptr& spObject, COMMAND_LIST_TYPE CommandListType) { AddToDeferredDeletionQueue(spObject, CommandListType, GetCommandListID(CommandListType)); } void SwapIdentities(DeviceChild& Other) { for (DWORD i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { std::swap(m_LastUsedCommandListID[i], Other.m_LastUsedCommandListID[i]); } assert(m_pParent == Other.m_pParent); assert(m_bWaitForCompletionRequired == Other.m_bWaitForCompletionRequired); } void AddToDeferredDeletionQueue(ID3D12Object* pObject); UINT64 GetCommandListID(COMMAND_LIST_TYPE CommandListType) noexcept; }; template class DeviceChildImpl : public DeviceChild { public: DeviceChildImpl(ImmediateContext* pParent) noexcept : DeviceChild(pParent) { } void Destroy() { AddToDeferredDeletionQueue(m_spIface); } ~DeviceChildImpl() { Destroy(); } bool Created() { return m_spIface.get() != nullptr; } TIface** GetForCreate() { Destroy(); return &m_spIface; } TIface* GetForUse(COMMAND_LIST_TYPE CommandListType, UINT64 CommandListID) { MarkUsedInCommandListIfNewer(CommandListType, CommandListID); return m_spIface.get(); } TIface* GetForUse(COMMAND_LIST_TYPE CommandListType) { return GetForUse(CommandListType, GetCommandListID(CommandListType)); } TIface* GetForImmediateUse() { return m_spIface.get(); } private: unique_comptr m_spIface; }; }; ================================================ FILE: include/DxbcBuilder.hpp ================================================ #pragma once #include //================================================================================================================================= // CDXBCBuilder // // Create, fill and retrieve a DXBC (DX Blob Container). // // Basic usage: // (1) Create CDXBCBuilder class instance using constructor (takes a parameter regarding whether to copy or allocate) // (2) For each blob of data you want to store, call AppendBlob() // (4) Finally, call either GetFinalDXBC() to retrieve the DXBC. // (5) You can start over again by calling StartNewContainer(), or just get rid of the class. // // Read comments inline below for full detail. // class CDXBCBuilder { public: // Constructor CDXBCBuilder(bool bMakeInternalCopiesOfBlobs) { // Setting bMakeInternalCopiesOfBlobs = true means that while the container is being built up // and you are passing in pointers to your blob data via AppendBlob(), this class will // allocate and make copies of the blobs you pass in. // Setting bMakeInternalCopiesOfBlobs = false means that AppendBlob() will store the // pointers you pass in, rather than copying the data. This is more efficient than // copying the data, but can only be used if you know that up until you finish building // the container by calling GetFinalDXBC*(), the pointers will continue to point to valid data. // // Note that the actual DXBC is only built up at the call to GetFinalDXBC(), when // all the blobs are traversed and copied into one contiguous memory allocation (the DXBC), // regardless of whether bMakeInternalCopiesOfBlobs is true or false. m_bMakeInternalCopiesOfBlobs = bMakeInternalCopiesOfBlobs; Init(); } ~CDXBCBuilder() { Cleanup(); } // Call to begin a new container. Don't need to call this the first time (constructor already sets it up). void StartNewContainer(); // Once a container has been started, use AppendBlob to append blobs of data to the container. // Each blob needs a fourCC to identify it (nothing wrong with adding multiple // blobs with the same fourCC though; they'll all be stored). Valid FourCCs come from the // enumeration near the top of this file. // The memory for the contents of the blob is copied into internally allocated storage // if the class was created with bMakeInternalCopiesOfBlobs, else direct pointers to // the data passed in are stored. // It is valid for a blob to be completely empty (BlobSize = 0) // Returns: // S_OK - On success. // E_OUTOFMEMORY - When internal memory allocation failed. // E_FAIL - If the total size of the blobs is beyond what is allowed, // or a container hasn't been started, or (BlobSize > 0 && !pBlobData), // or the BlobFourCC isn't recognized. HRESULT AppendBlob(DXBCFourCC BlobFourCC, UINT32 BlobSizeInBytes, const void *pBlobData); // Grabs a blob out of the specified parser and appends it to this builder // Returns: S_OK, S_FALSE (could not find blob), E_OUTOFMEMORY HRESULT AppendBlob(CDXBCParser *pParser, DXBCFourCC BlobFourCC); // After all blobs have been added, call GetFinalDXBC with pCallerAllocatedMemory set to NULL // to retrieve the required memory size for the final blob (output to pContainerSize). // Allocate that memory yourself, call GetFinalDXBC again passing in the memory and how much // was allocated, and the final container will be written out to the memory passed in, as long as it // is big enough, else nothing will be written). // When data is written out, the amount written is placed in pContainerSize if provided (0 if there // wasn't enough space, else the full size). // // Return values: S_OK, E_FAIL or E_OUTOFMEMORY (MS API hash algorithm code could run out of mem) HRESULT GetFinalDXBC(void *pCallerAllocatedMemory, UINT32 *pContainerSize); private: typedef struct BlobNode { DXBCBlobHeader BlobHeader; const void *pBlobData; BlobNode *pNext; } BlobNode; bool m_bMakeInternalCopiesOfBlobs; UINT32 m_TotalOutputContainerSize; // to check against DXBC_MAX_SIZE_IN_BYTES UINT32 m_BlobCount; BlobNode *m_pFirstBlob; BlobNode *m_pLastBlob; void Init(); void Cleanup(); }; ================================================ FILE: include/Fence.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { enum FENCE_FLAGS { FENCE_FLAG_NONE = 0x0, FENCE_FLAG_SHARED = 0x1, FENCE_FLAG_SHARED_CROSS_ADAPTER = 0x2, FENCE_FLAG_NON_MONITORED = 0x4, FENCE_FLAG_DEFERRED_WAITS = 0x8, }; DEFINE_ENUM_FLAG_OPERATORS(FENCE_FLAGS); class Fence : public DeviceChild { public: Fence(ImmediateContext* pParent, FENCE_FLAGS Flags, UINT64 InitialValue); Fence(ImmediateContext* pParent, HANDLE SharedHandle); Fence(ImmediateContext* pParent, ID3D12Fence* pFence); Fence(Fence const&) = delete; Fence& operator=(Fence const&) = delete; Fence(Fence&&) = delete; Fence& operator=(Fence&&) = delete; ~Fence(); UINT64 TRANSLATION_API GetCompletedValue() const { return m_spFence->GetCompletedValue(); } void TRANSLATION_API Signal(UINT64 Value) const { ThrowFailure(m_spFence->Signal(Value)); } HRESULT TRANSLATION_API SetEventOnCompletion(UINT64 Value, HANDLE hEvent) const { return m_spFence->SetEventOnCompletion(Value, hEvent); } HRESULT TRANSLATION_API CreateSharedHandle( _In_opt_ const SECURITY_ATTRIBUTES *pAttributes, _In_ DWORD dwAccess, _In_opt_ LPCWSTR lpName, _Out_ HANDLE *pHandle); bool TRANSLATION_API IsMonitored() const; bool DeferredWaits() const { return m_bDeferredWaits; } ID3D12Fence1* Get() const { return m_spFence.get(); } private: unique_comptr m_spFence; bool m_bDeferredWaits = false; }; } ================================================ FILE: include/FormatDesc.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #define D3DFORMATDESC 1 #define MAP_ALIGN_REQUIREMENT 16 // Map is required to return 16-byte aligned addresses // ---------------------------------------------------------------------------- // Some enumerations used in the D3D11_FORMAT_DETAIL structure // ---------------------------------------------------------------------------- typedef enum D3D11_FORMAT_LAYOUT { D3D11FL_STANDARD = 0, // standard layout D3D11FL_CUSTOM = -1 // custom layout // Note, 1 bit allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ } D3D11_FORMAT_LAYOUT; typedef enum D3D11_FORMAT_TYPE_LEVEL { D3D11FTL_NO_TYPE = 0, D3D11FTL_PARTIAL_TYPE = -2, D3D11FTL_FULL_TYPE = -1, // Note, 2 bits allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ } D3D11_FORMAT_TYPE_LEVEL; typedef enum D3D11_FORMAT_COMPONENT_NAME { D3D11FCN_R = -4, D3D11FCN_G = -3, D3D11FCN_B = -2, D3D11FCN_A = -1, D3D11FCN_D = 0, D3D11FCN_S = 1, D3D11FCN_X = 2, // Note, 3 bits allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ } D3D11_FORMAT_COMPONENT_NAME; typedef enum D3D11_FORMAT_COMPONENT_INTERPRETATION { D3D11FCI_TYPELESS = 0, D3D11FCI_FLOAT = -4, D3D11FCI_SNORM = -3, D3D11FCI_UNORM = -2, D3D11FCI_SINT = -1, D3D11FCI_UINT = 1, D3D11FCI_UNORM_SRGB = 2, D3D11FCI_BIASED_FIXED_2_8 = 3, // Note, 3 bits allocated for this in FORMAT_DETAIL below. If you add fields here, add bits... // NOTE SIGNED VALUES ARE USED SINCE COMPILER MAKES ENUMS SIGNED, AND BITFIELDS ARE SIGN EXTENDED ON READ } D3D11_FORMAT_COMPONENT_INTERPRETATION; // ---------------------------------------------------------------------------- // // CD3D11FormatHelper // // ---------------------------------------------------------------------------- class CD3D11FormatHelper { private: // ---------------------------------------------------------------------------- // Information describing everything about a D3D11 Resource Format // ---------------------------------------------------------------------------- // This struct holds information about formats that is feature level and driver version agnostic typedef struct FORMAT_DETAIL { DXGI_FORMAT DXGIFormat; DXGI_FORMAT ParentFormat; const DXGI_FORMAT* pDefaultFormatCastSet; // This is dependent on FL/driver version, but is here to save a lot of space UINT8 BitsPerComponent[4]; // only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE UINT8 BitsPerUnit; // BitsPerUnit is bits per pixel for non-compressed formats and bits per block for compressed formats BOOL SRGBFormat : 1; UINT WidthAlignment : 4; // number of texels to align to in a mip level. UINT HeightAlignment : 4; // Top level dimensions must be a multiple of these UINT DepthAlignment : 1; // values. D3D11_FORMAT_LAYOUT Layout : 1; D3D11_FORMAT_TYPE_LEVEL TypeLevel : 2; D3D11_FORMAT_COMPONENT_NAME ComponentName0 : 3; // RED ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE D3D11_FORMAT_COMPONENT_NAME ComponentName1 : 3; // GREEN ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE D3D11_FORMAT_COMPONENT_NAME ComponentName2 : 3; // BLUE ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE D3D11_FORMAT_COMPONENT_NAME ComponentName3 : 3; // ALPHA ... only used for D3D11FTL_PARTIAL_TYPE or FULL_TYPE D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation0 : 3; // only used for D3D11FTL_FULL_TYPE D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation1 : 3; // only used for D3D11FTL_FULL_TYPE D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation2 : 3; // only used for D3D11FTL_FULL_TYPE D3D11_FORMAT_COMPONENT_INTERPRETATION ComponentInterpretation3 : 3; // only used for D3D11FTL_FULL_TYPE bool bPlanar : 1; bool bYUV : 1; } FORMAT_DETAIL; static const FORMAT_DETAIL s_FormatDetail[]; static const LPCSTR s_FormatNames[]; // separate from above structure so it can be compiled out of runtime. static const UINT s_NumFormats; public: static bool IsBlockCompressFormat(DXGI_FORMAT Format); static UINT GetByteAlignment(DXGI_FORMAT Format); static HRESULT CalculateResourceSize(UINT width, UINT height, UINT depth, DXGI_FORMAT format, UINT mipLevels, UINT subresources, _Out_ SIZE_T& totalByteSize, _Out_writes_opt_(subresources) D3D11_MAPPED_SUBRESOURCE *pDst = nullptr); static HRESULT CalculateExtraPlanarRows(DXGI_FORMAT format, UINT plane0Height, _Out_ UINT& totalHeight); static HRESULT CalculateMinimumRowMajorRowPitch(DXGI_FORMAT Format, UINT Width, _Out_ UINT& RowPitch); static HRESULT CalculateMinimumRowMajorSlicePitch(DXGI_FORMAT Format, UINT ContextBasedRowPitch, UINT Height, _Out_ UINT& SlicePitch); static bool IsSRGBFormat(DXGI_FORMAT Format); static UINT GetNumComponentsInFormat( DXGI_FORMAT Format ); // Converts the sequential component index (range from 0 to GetNumComponentsInFormat()) to // the absolute component index (range 0 to 3). static DXGI_FORMAT GetParentFormat(DXGI_FORMAT Format); static const DXGI_FORMAT* GetFormatCastSet(DXGI_FORMAT Format); static D3D11_FORMAT_TYPE_LEVEL GetTypeLevel(DXGI_FORMAT Format); static UINT GetBitsPerUnit(DXGI_FORMAT Format); static UINT GetBitsPerElement(DXGI_FORMAT Format); // Legacy function used to support D3D10on9 only. Do not use. static UINT GetWidthAlignment(DXGI_FORMAT Format); static UINT GetHeightAlignment(DXGI_FORMAT Format); static UINT GetDepthAlignment(DXGI_FORMAT Format); static D3D11_FORMAT_COMPONENT_NAME GetComponentName(DXGI_FORMAT Format, UINT AbsoluteComponentIndex); static UINT GetBitsPerComponent(DXGI_FORMAT Format, UINT AbsoluteComponentIndex); static D3D11_FORMAT_COMPONENT_INTERPRETATION GetFormatComponentInterpretation(DXGI_FORMAT Format, UINT AbsoluteComponentIndex); static BOOL Planar(DXGI_FORMAT Format); static BOOL NonOpaquePlanar(DXGI_FORMAT Format); static BOOL YUV(DXGI_FORMAT Format); static BOOL Opaque(DXGI_FORMAT Format) { return Format == DXGI_FORMAT_420_OPAQUE; } static void GetTileShape(D3D11_TILE_SHAPE* pTileShape, DXGI_FORMAT Format, D3D11_RESOURCE_DIMENSION Dimension, UINT SampleCount); static bool FamilySupportsStencil(DXGI_FORMAT Format); static void GetYCbCrChromaSubsampling(DXGI_FORMAT Format, _Out_ UINT& HorizontalSubsampling, _Out_ UINT& VerticalSubsampling); static UINT NonOpaquePlaneCount(DXGI_FORMAT Format); protected: static UINT GetDetailTableIndex(DXGI_FORMAT Format); static UINT GetDetailTableIndexNoThrow(DXGI_FORMAT Format); private: static const FORMAT_DETAIL* GetFormatDetail( DXGI_FORMAT Format ); }; // End of file ================================================ FILE: include/ImmediateContext.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class Resource; class CommandListManager; struct TranslationLayerCallbacks { std::function m_pfnPostSubmit; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // A pool of objects that are recycled on specific fence values // This class assumes single threaded caller //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template class CFencePool { public: void ReturnToPool(TResourceType&& Resource, UINT64 FenceValue) noexcept { try { auto lock = m_pLock ? std::unique_lock(*m_pLock) : std::unique_lock(); m_Pool.emplace_back(FenceValue, std::move(Resource)); // throw( bad_alloc ) } catch (std::bad_alloc&) { // Just drop the error // All uses of this pool use unique_comptr, which will release the resource } } template TResourceType RetrieveFromPool(UINT64 CurrentFenceValue, PFNCreateNew pfnCreateNew, const CreationArgType&... CreationArgs) noexcept(false) { auto lock = m_pLock ? std::unique_lock(*m_pLock) : std::unique_lock(); auto Head = m_Pool.begin(); if (Head == m_Pool.end() || (CurrentFenceValue < Head->first)) { return std::move(pfnCreateNew(CreationArgs...)); // throw( _com_error ) } assert(Head->second); TResourceType ret = std::move(Head->second); m_Pool.erase(Head); return std::move(ret); } void Trim(UINT64 TrimThreshold, UINT64 CurrentFenceValue) { auto lock = m_pLock ? std::unique_lock(*m_pLock) : std::unique_lock(); auto Head = m_Pool.begin(); if (Head == m_Pool.end() || (CurrentFenceValue < Head->first)) { return; } UINT64 difference = CurrentFenceValue - Head->first; if (difference >= TrimThreshold) { // only erase one item per 'pump' assert(Head->second); m_Pool.erase(Head); } } CFencePool(bool bLock = false) noexcept : m_pLock(bLock ? new std::mutex : nullptr) { } CFencePool(CFencePool &&other) noexcept { m_Pool = std::move(other.m_Pool); m_pLock = std::move(other.m_pLock); } CFencePool& operator=(CFencePool &&other) noexcept { m_Pool = std::move(other.m_Pool); m_pLock = std::move(other.m_pLock); return *this; } protected: typedef std::pair TPoolEntry; typedef std::list TPool; CFencePool(CFencePool const& other) = delete; CFencePool& operator=(CFencePool const& other) = delete; protected: TPool m_Pool; std::unique_ptr m_pLock; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // A pool of objects that are recycled on specific fence values // with a maximum depth before blocking on RetrieveFromPool // This class assumes single threaded caller //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template class CBoundedFencePool : public CFencePool { public: template TResourceType RetrieveFromPool(UINT64 CurrentFenceValue, PFNWaitForFenceValue pfnWaitForFenceValue, PFNCreateNew pfnCreateNew, const CreationArgType&... CreationArgs) noexcept(false) { auto lock = this->m_pLock ? std::unique_lock(*this->m_pLock) : std::unique_lock(); auto Head = this->m_Pool.begin(); if (Head == this->m_Pool.end()) { return std::move(pfnCreateNew(CreationArgs...)); // throw( _com_error ) } else if (CurrentFenceValue < Head->first) { if (this->m_Pool.size() < m_MaxInFlightDepth) { return std::move(pfnCreateNew(CreationArgs...)); // throw( _com_error ) } else { pfnWaitForFenceValue(Head->first); // throw( _com_error ) } } assert(Head->second); TResourceType ret = std::move(Head->second); this->m_Pool.erase(Head); return std::move(ret); } CBoundedFencePool(bool bLock = false, UINT MaxInFlightDepth = UINT_MAX) noexcept : CFencePool(bLock), m_MaxInFlightDepth(MaxInFlightDepth) { } CBoundedFencePool(CBoundedFencePool&& other) noexcept : CFencePool(other), m_MaxInFlightDepth(other.m_MaxInFlightDepth) { } CBoundedFencePool& operator=(CBoundedFencePool&& other) noexcept { this->m_Pool = std::move(other.m_Pool); this->m_pLock = std::move(other.m_pLock); m_MaxInFlightDepth = other.m_MaxInFlightDepth; return *this; } protected: UINT m_MaxInFlightDepth; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Multi-level pool (for dynamic resource data upload) // This class is free-threaded (to enable D3D11 free-threaded resource destruction) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// template class CMultiLevelPool { public: CMultiLevelPool(UINT64 TrimThreshold, bool bLock) : m_Lock(bLock) , m_TrimThreshold(TrimThreshold) { } void ReturnToPool(UINT64 Size, TResourceType&& Resource, UINT64 FenceValue) noexcept { UINT PoolIndex = IndexFromSize(Size); auto Lock = m_Lock.TakeLock(); if (PoolIndex >= m_MultiPool.size()) { m_MultiPool.resize(PoolIndex + 1); } m_MultiPool[PoolIndex].ReturnToPool(std::move(Resource), FenceValue); } template TResourceType RetrieveFromPool(UINT64 Size, UINT64 CurrentFenceValue, PFNCreateNew pfnCreateNew) noexcept(false) { UINT PoolIndex = IndexFromSize(Size); UINT AlignedSize = (PoolIndex + 1) * ResourceSizeMultiple; auto Lock = m_Lock.TakeLock(); if (PoolIndex >= m_MultiPool.size()) { // pfnCreateNew might be expensive, and won't touch the data structure if (Lock.owns_lock()) { Lock.unlock(); } return std::move(pfnCreateNew(AlignedSize)); // throw( _com_error ) } ASSUME(PoolIndex < m_MultiPool.size()); // Note that RetrieveFromPool can call pfnCreateNew // m_Lock will be held during this potentially slow operation // This is not optimized because it is expected that once an app reaches steady-state // behavior, the pool will not need to grow. return std::move(m_MultiPool[PoolIndex].RetrieveFromPool(CurrentFenceValue, pfnCreateNew, AlignedSize)); // throw( _com_error ) } void Trim(UINT64 CurrentFenceValue) { auto Lock = m_Lock.TakeLock(); for (TPool& pool : m_MultiPool) { pool.Trim(m_TrimThreshold, CurrentFenceValue); } } protected: UINT IndexFromSize(UINT64 Size) noexcept { return (Size == 0) ? 0 : (UINT)((Size - 1) / ResourceSizeMultiple); } protected: typedef CFencePool TPool; typedef std::vector TMultiPool; protected: TMultiPool m_MultiPool; OptLock<> m_Lock; UINT64 m_TrimThreshold; }; typedef CMultiLevelPool, 64*1024> TDynamicBufferPool; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Fenced Ring Buffer // A simple ring buffer which keeps track of allocations on the GPU time line //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CFencedRingBuffer { public: CFencedRingBuffer(UINT32 Size = 0) : m_Size(Size) , m_Head(Size) , m_Ledger{} {} HRESULT Allocate(UINT32 NumItems, UINT64 CurrentFenceValue, _Out_ UINT32& OffsetOut) { assert(m_Size > 0); assert(NumItems < m_Size / 2); if (NumItems == 0) { OffsetOut = DereferenceTail(); return S_OK; } if (CurrentFenceValue > GetCurrentLedgeEntry().m_FenceValue) { if (FAILED(MoveToNextLedgerEntry(CurrentFenceValue))) { return E_FAIL; } } UINT64 tailLocation = DereferenceTail(); // Allocations need to be contiguous if (tailLocation + NumItems > m_Size) { UINT64 remainder = m_Size - tailLocation; UINT32 dummy = 0; // Throw away the difference so we can allocate a contiguous block if (FAILED(Allocate(UINT32(remainder), CurrentFenceValue, dummy))) { return E_FAIL; } } if (m_Tail + NumItems <= m_Head) { // The tail could have moved due to alignment so deref again OffsetOut = DereferenceTail(); GetCurrentLedgeEntry().m_NumAllocations += NumItems; m_Tail += NumItems; return S_OK; } else { OffsetOut = UINT32(-1); return E_FAIL; } } void Deallocate(UINT64 CompletedFenceValue) { for (size_t i = 0; i < _countof(m_Ledger); i++) { LedgerEntry& entry = m_Ledger[i]; const UINT32 bit = (1 << i); if ((m_LedgerMask & bit) && entry.m_FenceValue <= CompletedFenceValue) { // Dealloc m_Head += entry.m_NumAllocations; entry = {}; // Unset the bit m_LedgerMask &= ~(bit); } if (m_LedgerMask == 0) { break; } } } private: inline UINT32 DereferenceTail() const { return m_Tail % m_Size; } UINT64 m_Head = 0; UINT64 m_Tail = 0; UINT32 m_Size; struct LedgerEntry { UINT64 m_FenceValue; UINT32 m_NumAllocations; }; // TODO: If we define a max lag between CPU and GPU this should be set to slightly more than that static const UINT32 cLedgerSize = 16; LedgerEntry m_Ledger[cLedgerSize]; UINT32 m_LedgerMask = 0x1; static_assert(cLedgerSize <= std::numeric_limits::digits); UINT32 m_LedgerIndex = 0; LedgerEntry& GetCurrentLedgeEntry() { return m_Ledger[m_LedgerIndex]; } bool IsLedgerEntryAvailable(UINT32 Index) const { return (m_LedgerMask & (1 << Index)) == 0; } HRESULT MoveToNextLedgerEntry(UINT64 CurrentFenceValue) { m_LedgerIndex++; m_LedgerIndex %= cLedgerSize; if (IsLedgerEntryAvailable(m_LedgerIndex)) { m_LedgerMask |= (1 << m_LedgerIndex); GetCurrentLedgeEntry().m_NumAllocations = 0; GetCurrentLedgeEntry().m_FenceValue = CurrentFenceValue; return S_OK; } else { return E_FAIL; } } }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Descriptor heap manager // Used to allocate descriptors from CPU-only heaps corresponding to view/sampler objects //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// class CDescriptorHeapManager { public: // Types typedef D3D12_CPU_DESCRIPTOR_HANDLE HeapOffset; typedef decltype(HeapOffset::ptr) HeapOffsetRaw; typedef UINT HeapIndex; private: // Types struct SFreeRange { HeapOffsetRaw Start; HeapOffsetRaw End; }; struct SHeapEntry { unique_comptr m_Heap; std::list m_FreeList; SHeapEntry() { } SHeapEntry(SHeapEntry &&o) : m_Heap(std::move(o.m_Heap)), m_FreeList(std::move(o.m_FreeList)) { } }; // Note: This data structure relies on the pointer validity guarantee of std::deque, // that as long as inserts/deletes are only on either end of the container, pointers // to elements continue to be valid. If trimming becomes an option, the free heap // list must be re-generated at that time. typedef std::deque THeapMap; public: // Methods CDescriptorHeapManager(ID3D12Device* pDevice, D3D12_DESCRIPTOR_HEAP_TYPE Type, UINT NumDescriptorsPerHeap, bool bLockRequired, UINT NodeMask) noexcept : m_Desc( { Type, NumDescriptorsPerHeap, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, NodeMask} ) , m_DescriptorSize(pDevice->GetDescriptorHandleIncrementSize(Type)) , m_pDevice(pDevice) , m_CritSect(bLockRequired) { } HeapOffset AllocateHeapSlot(_Out_opt_ HeapIndex *outIndex = nullptr) noexcept(false) { auto Lock = m_CritSect.TakeLock(); if (m_FreeHeaps.empty()) { AllocateHeap(); // throw( _com_error ) } assert(!m_FreeHeaps.empty()); HeapIndex index = m_FreeHeaps.front(); SHeapEntry &HeapEntry = m_Heaps[index]; assert(!HeapEntry.m_FreeList.empty()); SFreeRange &Range = *HeapEntry.m_FreeList.begin(); HeapOffset Ret = { Range.Start }; Range.Start += m_DescriptorSize; if (Range.Start == Range.End) { HeapEntry.m_FreeList.pop_front(); if (HeapEntry.m_FreeList.empty()) { m_FreeHeaps.pop_front(); } } if (outIndex) { *outIndex = index; } return Ret; } void FreeHeapSlot(HeapOffset Offset, HeapIndex index) noexcept { auto Lock = m_CritSect.TakeLock(); try { assert(index < m_Heaps.size()); SHeapEntry &HeapEntry = m_Heaps[index]; SFreeRange NewRange = { Offset.ptr, Offset.ptr + m_DescriptorSize }; bool bFound = false; for (auto it = HeapEntry.m_FreeList.begin(), end = HeapEntry.m_FreeList.end(); it != end && !bFound; ++it) { SFreeRange &Range = *it; assert(Range.Start <= Range.End); if (Range.Start == Offset.ptr + m_DescriptorSize) { Range.Start = Offset.ptr; bFound = true; } else if (Range.End == Offset.ptr) { Range.End += m_DescriptorSize; bFound = true; } else { assert(Range.End < Offset.ptr || Range.Start > Offset.ptr); if (Range.Start > Offset.ptr) { HeapEntry.m_FreeList.insert(it, NewRange); // throw( bad_alloc ) bFound = true; } } } if (!bFound) { if (HeapEntry.m_FreeList.empty()) { m_FreeHeaps.push_back(index); // throw( bad_alloc ) } HeapEntry.m_FreeList.push_back(NewRange); // throw( bad_alloc ) } } catch( std::bad_alloc& ) { // Do nothing - there will be slots that can no longer be reclaimed. } } private: // Methods void AllocateHeap() noexcept(false) { SHeapEntry NewEntry; ThrowFailure( m_pDevice->CreateDescriptorHeap(&m_Desc, IID_PPV_ARGS(&NewEntry.m_Heap)) ); // throw( _com_error ) HeapOffset HeapBase = NewEntry.m_Heap->GetCPUDescriptorHandleForHeapStart(); NewEntry.m_FreeList.push_back({HeapBase.ptr, HeapBase.ptr + m_Desc.NumDescriptors * m_DescriptorSize}); // throw( bad_alloc ) m_Heaps.emplace_back(std::move(NewEntry)); // throw( bad_alloc ) m_FreeHeaps.push_back(static_cast(m_Heaps.size() - 1)); // throw( bad_alloc ) } private: // Members const D3D12_DESCRIPTOR_HEAP_DESC m_Desc; const UINT m_DescriptorSize; ID3D12Device* const m_pDevice; // weak-ref OptLock<> m_CritSect; THeapMap m_Heaps; std::list m_FreeHeaps; }; // Extra data appended to the end of stream-output buffers struct SStreamOutputSuffix { UINT BufferFilledSize; UINT VertexCountPerInstance; UINT InstanceCount; UINT StartVertexLocation; UINT StartInstanceLocation; }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Core implementation //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// enum EDirtyBits : UINT64 { // Pipeline states: // Dirty bits are set when a shader or constant in the PSO desc changes, and causes a PSO lookup/compile // Reassert bits are set on command list boundaries, on graphics/compute boundaries, and after dirty processing e_PipelineStateDirty = 0x1, // Heap-based bindings: // Dirty bits are set when API bindings change, or on command list boundaries (to enable optimizations), // and causes heap slots to be allocated, and descriptors to be copied // Reassert bits are set after dirty processing e_VSShaderResourcesDirty = 0x4, e_VSConstantBuffersDirty = 0x8, e_VSSamplersDirty = 0x10, e_PSShaderResourcesDirty = 0x20, e_PSConstantBuffersDirty = 0x40, e_PSSamplersDirty = 0x80, e_GSShaderResourcesDirty = 0x100, e_GSConstantBuffersDirty = 0x200, e_GSSamplersDirty = 0x400, e_HSShaderResourcesDirty = 0x800, e_HSConstantBuffersDirty = 0x1000, e_HSSamplersDirty = 0x2000, e_DSShaderResourcesDirty = 0x4000, e_DSConstantBuffersDirty = 0x8000, e_DSSamplersDirty = 0x10000, e_CSShaderResourcesDirty = 0x20000, e_CSConstantBuffersDirty = 0x40000, e_CSSamplersDirty = 0x80000, e_UnorderedAccessViewsDirty = 0x100000, e_CSUnorderedAccessViewsDirty = 0x200000, // Non-heap-based (graphics) bindings: // Dirty bits are not used // Reassert bits are set on command list boundaries, prevent incremental bindings, and cause the full binding space to be set on PreDraw // Note: incremental bindings only applies to VB and SO e_RenderTargetsDirty = 0x400000, e_StreamOutputDirty = 0x800000, e_VertexBuffersDirty = 0x1000000, e_IndexBufferDirty = 0x2000000, // Fixed-function knobs: // Dirty bits are not used // Reassert bits are set on command list boundaries, are unset during update DDI invocations, and cause the ImmCtx pipeline state to be applied e_BlendFactorDirty = 0x4000000, e_StencilRefDirty = 0x8000000, e_PrimitiveTopologyDirty = 0x10000000, e_ViewportsDirty = 0x20000000, e_ScissorRectsDirty = 0x40000000, e_PredicateDirty = 0x80000000, e_FirstDraw = 0x100000000, e_FirstDispatch = 0x200000000, e_GraphicsRootSignatureDirty = 0x400000000, e_ComputeRootSignatureDirty = 0x800000000, // Bit combinations e_VSBindingsDirty = e_VSShaderResourcesDirty | e_VSConstantBuffersDirty | e_VSSamplersDirty, e_PSBindingsDirty = e_PSShaderResourcesDirty | e_PSConstantBuffersDirty | e_PSSamplersDirty, e_GSBindingsDirty = e_GSShaderResourcesDirty | e_GSConstantBuffersDirty | e_GSSamplersDirty, e_HSBindingsDirty = e_HSShaderResourcesDirty | e_HSConstantBuffersDirty | e_HSSamplersDirty, e_DSBindingsDirty = e_DSShaderResourcesDirty | e_DSConstantBuffersDirty | e_DSSamplersDirty, // Combinations of Heap-based bindings, by pipeline type e_GraphicsBindingsDirty = e_VSBindingsDirty | e_PSBindingsDirty | e_GSBindingsDirty | e_DSBindingsDirty | e_HSBindingsDirty | e_UnorderedAccessViewsDirty, e_ComputeBindingsDirty = e_CSShaderResourcesDirty | e_CSConstantBuffersDirty | e_CSSamplersDirty | e_CSUnorderedAccessViewsDirty, // Combinations of heap-based bindings, by heap type e_ViewsDirty = e_VSShaderResourcesDirty | e_VSConstantBuffersDirty | e_PSShaderResourcesDirty | e_PSConstantBuffersDirty | e_GSShaderResourcesDirty | e_GSConstantBuffersDirty | e_HSShaderResourcesDirty | e_HSConstantBuffersDirty | e_DSShaderResourcesDirty | e_DSConstantBuffersDirty | e_CSShaderResourcesDirty | e_CSConstantBuffersDirty | e_UnorderedAccessViewsDirty | e_CSUnorderedAccessViewsDirty, e_SamplersDirty = e_VSSamplersDirty | e_PSSamplersDirty | e_GSSamplersDirty | e_HSSamplersDirty | e_DSSamplersDirty | e_CSSamplersDirty, // All heap-based bindings e_HeapBindingsDirty = e_GraphicsBindingsDirty | e_ComputeBindingsDirty, // (Mostly) graphics-only bits, except for predicate e_NonHeapBindingsDirty = e_RenderTargetsDirty | e_StreamOutputDirty | e_VertexBuffersDirty | e_IndexBufferDirty, e_FixedFunctionDirty = e_BlendFactorDirty | e_StencilRefDirty | e_PrimitiveTopologyDirty | e_ViewportsDirty | e_ScissorRectsDirty | e_PredicateDirty, // All state bits by pipeline type e_GraphicsStateDirty = e_PipelineStateDirty | e_GraphicsBindingsDirty | e_NonHeapBindingsDirty | e_FixedFunctionDirty | e_FirstDraw | e_GraphicsRootSignatureDirty, e_ComputeStateDirty = e_PipelineStateDirty | e_ComputeBindingsDirty | e_FirstDispatch | e_ComputeRootSignatureDirty, // Accumulations of state bits set on command list boundaries and initialization // New command lists require all state to be reasserted, but nothing new needs to be dirtied. // The first command list associated with a device must treat all heaps as dirty // to setup initial descriptor tables e_DirtyOnNewCommandList = 0, e_DirtyOnFirstCommandList = e_HeapBindingsDirty, e_ReassertOnNewCommandList = e_GraphicsStateDirty | e_ComputeStateDirty, }; class ImmediateContext; struct RetiredObject { RetiredObject() {} RetiredObject(COMMAND_LIST_TYPE CommandListType, UINT64 lastCommandListID, bool completionRequired, std::vector deferredWaits = std::vector()) : m_completionRequired(completionRequired), m_deferredWaits(std::move(deferredWaits)) { m_lastCommandListIDs[(UINT)CommandListType] = lastCommandListID; } RetiredObject(const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], bool completionRequired, std::vector deferredWaits = std::vector()) : m_completionRequired(completionRequired), m_deferredWaits(std::move(deferredWaits)) { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { m_lastCommandListIDs[i] = lastCommandListIDs[i]; } } static bool ReadyToDestroy(ImmediateContext* pContext, bool completionRequired, UINT64 lastCommandListID, COMMAND_LIST_TYPE CommandListType, const std::vector& deferredWaits = std::vector()); static bool ReadyToDestroy(ImmediateContext* pContext, bool completionRequired, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], const std::vector& deferredWaits = std::vector()); static bool DeferredWaitsSatisfied(const std::vector& deferredWaits); bool ReadyToDestroy(ImmediateContext* pContext) { return ReadyToDestroy(pContext, m_completionRequired, m_lastCommandListIDs, m_deferredWaits); } UINT64 m_lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; bool m_completionRequired = false; std::vector m_deferredWaits; }; struct RetiredD3D12Object : public RetiredObject { RetiredD3D12Object() {} RetiredD3D12Object(ID3D12Object* pUnderlying, _In_opt_ std::unique_ptr &&pResidencyHandle, COMMAND_LIST_TYPE CommandListType, UINT64 lastCommandListID, bool completionRequired, std::vector deferredWaits) : RetiredObject(CommandListType, lastCommandListID, completionRequired, std::move(deferredWaits)) , m_pUnderlying(pUnderlying) , m_pResidencyHandle(std::move(pResidencyHandle)) {} RetiredD3D12Object(ID3D12Object* pUnderlying, _In_opt_ std::unique_ptr &&pResidencyHandle, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], bool completionRequired, std::vector deferredWaits) : RetiredObject(lastCommandListIDs, completionRequired, std::move(deferredWaits)) , m_pUnderlying(pUnderlying) , m_pResidencyHandle(std::move(pResidencyHandle)) {} RetiredD3D12Object(RetiredD3D12Object &&retiredObject) : RetiredObject(retiredObject) , m_pUnderlying(retiredObject.m_pUnderlying) , m_pResidencyHandle(std::move(retiredObject.m_pResidencyHandle)) {} CComPtr m_pUnderlying; std::unique_ptr m_pResidencyHandle; }; typedef ConditionalAllocator ConditionalHeapAllocator; struct RetiredSuballocationBlock : public RetiredObject { RetiredSuballocationBlock(HeapSuballocationBlock &block, ConditionalHeapAllocator &parentAllocator, COMMAND_LIST_TYPE CommandListType, UINT64 lastCommandListID) : RetiredObject(CommandListType, lastCommandListID, true) , m_SuballocatedBlock(block) , m_ParentAllocator(parentAllocator) {} RetiredSuballocationBlock(HeapSuballocationBlock &block, ConditionalHeapAllocator &parentAllocator, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID]) : RetiredObject(lastCommandListIDs, true) , m_SuballocatedBlock(block) , m_ParentAllocator(parentAllocator) {} void Destroy() { m_ParentAllocator.Deallocate(m_SuballocatedBlock); } HeapSuballocationBlock m_SuballocatedBlock; ConditionalHeapAllocator &m_ParentAllocator; }; class DeferredDeletionQueueManager { public: DeferredDeletionQueueManager(ImmediateContext *pContext) : m_pParent(pContext) {} ~DeferredDeletionQueueManager() { TrimDeletedObjects(true); } bool TrimDeletedObjects(bool deviceBeingDestroyed = false); bool GetFenceValuesForObjectDeletion(UINT64(&FenceValues)[(UINT)COMMAND_LIST_TYPE::MAX_VALID]); bool GetFenceValuesForSuballocationDeletion(UINT64(&FenceValues)[(UINT)COMMAND_LIST_TYPE::MAX_VALID]); void AddObjectToQueue(ID3D12Object* pUnderlying, std::unique_ptr &&pResidencyHandle, COMMAND_LIST_TYPE CommandListType, UINT64 lastCommandListID, bool completionRequired, std::vector deferredWaits = std::vector()) { m_DeferredObjectDeletionQueue.push(RetiredD3D12Object(pUnderlying, std::move(pResidencyHandle), CommandListType, lastCommandListID, completionRequired, std::move(deferredWaits))); } void AddObjectToQueue(ID3D12Object* pUnderlying, std::unique_ptr &&pResidencyHandle, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], bool completionRequired, std::vector deferredWaits = std::vector()) { m_DeferredObjectDeletionQueue.push(RetiredD3D12Object(pUnderlying, std::move(pResidencyHandle), lastCommandListIDs, completionRequired, std::move(deferredWaits))); } void AddSuballocationToQueue(HeapSuballocationBlock &suballocation, ConditionalHeapAllocator &parentAllocator, COMMAND_LIST_TYPE CommandListType, UINT64 lastCommandListID) { RetiredSuballocationBlock retiredSuballocation(suballocation, parentAllocator, CommandListType, lastCommandListID); if (!retiredSuballocation.ReadyToDestroy(m_pParent)) { m_DeferredSuballocationDeletionQueue.push(retiredSuballocation); } else { retiredSuballocation.Destroy(); } } void AddSuballocationToQueue(HeapSuballocationBlock &suballocation, ConditionalHeapAllocator &parentAllocator, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID]) { RetiredSuballocationBlock retiredSuballocation(suballocation, parentAllocator, lastCommandListIDs); if (!retiredSuballocation.ReadyToDestroy(m_pParent)) { m_DeferredSuballocationDeletionQueue.push(retiredSuballocation); } else { retiredSuballocation.Destroy(); } } private: bool SuballocationsReadyToBeDestroyed(bool deviceBeingDestroyed); ImmediateContext* m_pParent; std::queue m_DeferredObjectDeletionQueue; std::queue m_DeferredSuballocationDeletionQueue; }; template class COptLockedContainer { OptLock m_CS; T m_Obj; public: class LockedAccess { std::unique_lock m_Lock; T& m_Obj; public: LockedAccess(OptLock &CS, T& Obj) : m_Lock(CS.TakeLock()) , m_Obj(Obj) { } T* operator->() { return &m_Obj; } }; // Intended use: GetLocked()->member. // The LockedAccess temporary object ensures synchronization until the end of the expression. template COptLockedContainer(Args&&... args) : m_Obj(std::forward(args)...) { } LockedAccess GetLocked() { return LockedAccess(m_CS, m_Obj); } void InitLock() { m_CS.EnsureLock(); } }; enum ResourceInfoType { TiledPoolType, ResourceType }; struct ResourceInfo { union { struct { D3D12_HEAP_DESC m_HeapDesc; } TiledPool; struct { D3D12_RESOURCE_DESC m_ResourceDesc; D3D11_RESOURCE_FLAGS m_Flags11; D3D12_HEAP_FLAGS m_HeapFlags; D3D12_HEAP_PROPERTIES m_HeapProps; } Resource; }; ResourceInfoType m_Type; bool m_bShared; bool m_bNTHandle; bool m_bSynchronized; bool m_bAllocatedBy9on12; HANDLE m_GDIHandle; }; using RenameResourceSet = std::deque>; struct PresentSurface { PresentSurface() : m_pResource(nullptr), m_subresource(0) {} PresentSurface(Resource* pResource, UINT subresource = 0) : m_pResource(pResource), m_subresource(subresource) {} Resource* m_pResource; UINT m_subresource; }; struct PresentCBArgs { _In_ ID3D12CommandQueue* pGraphicsCommandQueue; _In_ ID3D12CommandList* pGraphicsCommandList; _In_reads_(numSrcSurfaces) const PresentSurface* pSrcSurfaces; UINT numSrcSurfaces; _In_opt_ Resource* pDest; UINT flipInterval; UINT vidPnSourceId; _In_ D3DKMT_PRESENT* pKMTPresent; }; class ImmediateContext { public: // D3D12 objects // TODO: const const unique_comptr m_pDevice12; #if DYNAMIC_LOAD_DXCORE XPlatHelpers::unique_module m_DXCore; #endif unique_comptr m_pDXCoreAdapter; unique_comptr m_pDXGIAdapter; unique_comptr m_pDevice12_1; unique_comptr m_pDevice12_2; // TODO: Instead of adding more next time, replace unique_comptr m_pCompatDevice; unique_comptr m_pSyncOnlyQueue; private: std::unique_ptr m_CommandLists[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; // Residency Manager needs to come after the deferred deletion queue so that defer deleted objects can // call EndTrackingObject on a valid residency manager ResidencyManager m_residencyManager; // It is important that the deferred deletion queue manager gets destroyed last, place solely strict dependencies above. COptLockedContainer m_DeferredDeletionQueueManager; // Must be initialized before BindingTracker logic for m_CurrentState D3D_FEATURE_LEVEL m_FeatureLevel; static constexpr UINT64 m_MaxBufferPoolTrimThreshold = 100; static constexpr UINT64 m_MinBufferPoolTrimThreshold = 5; public: class CDisablePredication { public: CDisablePredication(ImmediateContext* pParent); ~CDisablePredication(); private: ImmediateContext* m_pParent; }; friend class Query; friend class CommandListManager; class CreationArgs { public: CreationArgs() { ZeroMemory(this, sizeof(*this)); BufferPoolTrimThreshold = m_MaxBufferPoolTrimThreshold; } UINT RequiresBufferOutOfBoundsHandling : 1; UINT CreatesAndDestroysAreMultithreaded : 1; UINT RenamingIsMultithreaded : 1; UINT UseThreadpoolForPSOCreates : 1; UINT UseRoundTripPSOs : 1; UINT UseResidencyManagement : 1; UINT DisableGPUTimeout : 1; UINT IsXbox : 1; UINT AdjustYUY2BlitCoords : 1; GUID CreatorID; DWORD MaxAllocatedUploadHeapSpacePerCommandList; DWORD MaxSRVHeapSize; DWORD BufferPoolTrimThreshold; }; ImmediateContext(UINT nodeIndex, D3D12_FEATURE_DATA_D3D12_OPTIONS& caps, ID3D12Device* pDevice, ID3D12CommandQueue* pQueue, TranslationLayerCallbacks const& callbacks, UINT64 debugFlags, CreationArgs args) noexcept(false); ~ImmediateContext() noexcept; #if TRANSLATION_LAYER_DBG UINT64 DebugFlags() { return m_DebugFlags; } #endif CreationArgs m_CreationArgs; bool RequiresBufferOutofBoundsHandling() { return m_CreationArgs.RequiresBufferOutOfBoundsHandling; } bool IsXbox() { return m_CreationArgs.IsXbox; } // Currently only accurate with D3D11. CommandListManager *GetCommandListManager(COMMAND_LIST_TYPE type) noexcept; ID3D12CommandList *GetCommandList(COMMAND_LIST_TYPE type) noexcept; UINT64 GetCommandListID(COMMAND_LIST_TYPE type) noexcept; UINT64 GetCommandListIDInterlockedRead(COMMAND_LIST_TYPE type) noexcept; UINT64 GetCommandListIDWithCommands(COMMAND_LIST_TYPE type) noexcept; UINT64 GetCompletedFenceValue(COMMAND_LIST_TYPE type) noexcept; ID3D12CommandQueue *GetCommandQueue(COMMAND_LIST_TYPE type) noexcept; void ResetCommandList(UINT commandListTypeMask) noexcept; void CloseCommandList(UINT commandListTypeMask) noexcept; HRESULT EnqueueSetEvent(UINT commandListTypeMask, HANDLE hEvent) noexcept; HRESULT EnqueueSetEvent(COMMAND_LIST_TYPE commandListType, HANDLE hEvent) noexcept; Fence *GetFence(COMMAND_LIST_TYPE type) noexcept; void SubmitCommandList(UINT commandListTypeMask); void SubmitCommandList(COMMAND_LIST_TYPE commandListType); // Returns true if synchronization was successful, false likely means device is removed bool WaitForCompletion(UINT commandListTypeMask) noexcept; bool WaitForCompletion(COMMAND_LIST_TYPE commandListType); bool WaitForFenceValue(COMMAND_LIST_TYPE commandListType, UINT64 FenceValue); bool WaitForFenceValue(COMMAND_LIST_TYPE type, UINT64 FenceValue, bool DoNotWait); ID3D12GraphicsCommandList *GetGraphicsCommandList() noexcept; ID3D12VideoDecodeCommandList2 *GetVideoDecodeCommandList() noexcept; ID3D12VideoProcessCommandList2 *GetVideoProcessCommandList() noexcept; void AdditionalCommandsAdded(COMMAND_LIST_TYPE type) noexcept; void UploadHeapSpaceAllocated(COMMAND_LIST_TYPE type, UINT64 HeapSize) noexcept; unique_comptr AllocateHeap(UINT64 HeapSize, UINT64 alignment, AllocatorHeapType heapType) noexcept(false); void InitializeVideo(ID3D12VideoDevice **ppVideoDevice); void AddObjectToResidencySet(Resource *pResource, COMMAND_LIST_TYPE commandListType); void AddResourceToDeferredDeletionQueue(ID3D12Object* pUnderlying, std::unique_ptr &&pResidencyHandle, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], bool completionRequired, std::vector deferredWaits); void AddObjectToDeferredDeletionQueue(ID3D12Object* pUnderlying, COMMAND_LIST_TYPE commandListType, UINT64 lastCommandListID, bool completionRequired); void AddObjectToDeferredDeletionQueue(ID3D12Object* pUnderlying, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], bool completionRequired); bool TrimDeletedObjects(bool deviceBeingDestroyed = false); bool TrimResourcePools(); unique_comptr AcquireTransitionableUploadBuffer(AllocatorHeapType HeapType, UINT64 Size) noexcept(false); void ReturnTransitionableBufferToPool(AllocatorHeapType HeapType, UINT64 Size, unique_comptr&&spResource, UINT64 FenceValue) noexcept; D3D12ResourceSuballocation AcquireSuballocatedHeapForResource(_In_ Resource* pResource, ResourceAllocationContext threadingContext) noexcept(false); D3D12ResourceSuballocation AcquireSuballocatedHeap(AllocatorHeapType HeapType, UINT64 Size, ResourceAllocationContext threadingContext, bool bCannotBeOffset = false) noexcept(false); void ReleaseSuballocatedHeap(AllocatorHeapType HeapType, D3D12ResourceSuballocation &resource, UINT64 FenceValue, COMMAND_LIST_TYPE commandListType) noexcept; void ReleaseSuballocatedHeap(AllocatorHeapType HeapType, D3D12ResourceSuballocation &resource, const UINT64 FenceValues[]) noexcept; void ReturnAllBuffersToPool( Resource& UnderlyingResource) noexcept; static void UploadDataToMappedBuffer(_In_reads_bytes_(Placement.Depth * DepthPitch) const void* pData, UINT SrcPitch, UINT SrcDepth, _Out_writes_bytes_(Placement.Depth * DepthPitch) void* pMappedData, D3D12_SUBRESOURCE_FOOTPRINT& Placement, UINT DepthPitch, UINT TightRowPitch) noexcept; // This is similar to the D3D12 header helper method, but it can handle 11on12-emulated resources, as well as a dst box enum class UpdateSubresourcesFlags { ScenarioImmediateContext, // Servicing an immediate context operation, e.g. UpdateSubresource API or some kind of clear ScenarioInitialData, // Servicing a free-threaded method, but guaranteed that the dest resource is idle ScenarioBatchedContext, // Servicing a queued operation, but may be occurring in parallel with immediate context operations ScenarioImmediateContextInternalOp, // Servicing an internal immediate context operation (e.g. updating UAV/SO counters) and should not respect predication ScenarioMask = 0x3, None = 0, ChannelSwapR10G10B10A2 = 0x4, }; void UpdateSubresources(Resource* pDst, D3D12TranslationLayer::CSubresourceSubset const& Subresources, _In_reads_opt_(_Inexpressible_(Subresources.NumNonExtendedSubresources())) const D3D11_SUBRESOURCE_DATA* pSrcData, _In_opt_ const D3D12_BOX* pDstBox = nullptr, UpdateSubresourcesFlags flags = UpdateSubresourcesFlags::ScenarioImmediateContext, _In_opt_ const void* pClearColor = nullptr ); struct PreparedUpdateSubresourcesOperation { UINT64 OffsetAdjustment; // 0-8 bytes EncodedResourceSuballocation EncodedBlock; // 8-32 bytes (last 4 bytes padding on x86) CSubresourceSubset EncodedSubresourceSubset; // 32-40 bytes UINT DstX; // 40-44 bytes UINT DstY; // 44-48 bytes UINT DstZ; // 48-52 bytes bool bDisablePredication; // byte 52 bool bDstBoxPresent; // byte 53 // 2 bytes padding }; static_assert(sizeof(PreparedUpdateSubresourcesOperation) == 56, "Math above is wrong. Check if padding can be removed."); struct PreparedUpdateSubresourcesOperationWithLocalPlacement { PreparedUpdateSubresourcesOperation Base; D3D12_PLACED_SUBRESOURCE_FOOTPRINT LocalPlacementDescs[2]; }; class CPrepareUpdateSubresourcesHelper { public: PreparedUpdateSubresourcesOperationWithLocalPlacement PreparedStorage; Resource& Dst; CSubresourceSubset const& Subresources; const bool bDeInterleavingUpload = Dst.SubresourceMultiplier() > 1; const UINT NumSrcData = Subresources.NumNonExtendedSubresources(); const UINT NumDstSubresources = Subresources.NumExtendedSubresources(); const UINT8 PlaneCount = (Dst.SubresourceMultiplier() * Dst.AppDesc()->NonOpaquePlaneCount()); const UINT FirstDstSubresource = ComposeSubresourceIdxExtended(Subresources.m_BeginMip, Subresources.m_BeginArray, Subresources.m_BeginPlane, Dst.AppDesc()->MipLevels(), Dst.AppDesc()->ArraySize()); const UINT LastDstSubresource = ComposeSubresourceIdxExtended(Subresources.m_EndMip - 1, Subresources.m_EndArray - 1, Subresources.m_EndPlane - 1, Dst.AppDesc()->MipLevels(), Dst.AppDesc()->ArraySize()); const bool bDisjointSubresources = LastDstSubresource - FirstDstSubresource + 1 != NumDstSubresources; const bool bDstBoxPresent; const bool bUseLocalPlacement = bDstBoxPresent || bDisjointSubresources; bool FinalizeNeeded = false; private: UINT64 TotalSize = 0; D3D12ResourceSuballocation mappableResource; UINT bufferOffset = 0; bool CachedNeedsTemporaryUploadHeap = false; public: CPrepareUpdateSubresourcesHelper(Resource& Dst, CSubresourceSubset const& Subresources, const D3D11_SUBRESOURCE_DATA* pSrcData, const D3D12_BOX* pDstBox, UpdateSubresourcesFlags flags, const void* pClearPattern, UINT ClearPatternSize, ImmediateContext& ImmCtx); private: #if TRANSLATION_LAYER_DBG void AssertPreconditions(const D3D11_SUBRESOURCE_DATA* pSrcData, const void* pClearPattern); #endif bool InitializePlacementsAndCalculateSize(const D3D12_BOX* pDstBox, ImmediateContext& ImmCtx); bool NeedToRespectPredication(UpdateSubresourcesFlags flags) const; bool NeedTemporaryUploadHeap(UpdateSubresourcesFlags flags, ImmediateContext& ImmCtx) const; void InitializeMappableResource(UpdateSubresourcesFlags flags, ImmediateContext& ImmCtx, D3D12_BOX const* pDstBox); void UploadSourceDataToMappableResource(void* pDstData, D3D11_SUBRESOURCE_DATA const* pSrcData, ImmediateContext& ImmCtx, UpdateSubresourcesFlags flags); void UploadDataToMappableResource(D3D11_SUBRESOURCE_DATA const* pSrcData, ImmediateContext& ImmCtx, D3D12_BOX const* pDstBox, const void* pClearPattern, UINT ClearPatternSize, UpdateSubresourcesFlags flags); void WriteOutputParameters(D3D12_BOX const* pDstBox, UpdateSubresourcesFlags flags); }; void FinalizeUpdateSubresources(Resource* pDst, PreparedUpdateSubresourcesOperation const& PreparedStorage, _In_reads_opt_(2) D3D12_PLACED_SUBRESOURCE_FOOTPRINT const* LocalPlacementDescs); void CopyAndConvertSubresourceRegion(Resource* pDst, UINT DstSubresource, Resource* pSrc, UINT SrcSubresource, UINT dstX, UINT dstY, UINT dstZ, const D3D12_BOX* pSrcBox) noexcept; bool CreatesAndDestroysAreMultithreaded() const noexcept { return m_CreationArgs.CreatesAndDestroysAreMultithreaded; } void UAVBarrier() noexcept; // Mark resources and add them to the transition list // Subresource binding states are assumed to already be changed using wrappers below void TransitionResourceForBindings(Resource* pResource) noexcept; void TransitionResourceForBindings(ViewBase* pView) noexcept; static void ConstantBufferBound(Resource* pBuffer, UINT slot, EShaderStage stage) noexcept; static void ConstantBufferUnbound(Resource* pBuffer, UINT slot, EShaderStage stage) noexcept; static void VertexBufferBound(Resource* pBuffer, UINT slot) noexcept; static void VertexBufferUnbound(Resource* pBuffer, UINT slot) noexcept; static void IndexBufferBound(Resource* pBuffer) noexcept; static void IndexBufferUnbound(Resource* pBuffer) noexcept; static void StreamOutputBufferBound(Resource* pBuffer, UINT slot) noexcept; static void StreamOutputBufferUnbound(Resource* pBuffer, UINT slot) noexcept; void ClearDSVBinding(); void ClearRTVBinding(UINT slot); void ClearVBBinding(UINT slot); void SetPredicationInternal(Query*, BOOL); void WriteToSubresource(Resource* DstResource, UINT DstSubresource, _In_opt_ const D3D11_BOX* pDstBox, const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch); void ReadFromSubresource(void* pDstData, UINT DstRowPitch, UINT DstDepthPitch, Resource* SrcResource, UINT SrcSubresource, _In_opt_ const D3D11_BOX* pSrcBox); ResourceCache &GetResourceCache() { return m_ResourceCache; } public: PipelineState* GetPipelineState(); void TRANSLATION_API SetPipelineState(PipelineState* pPipeline); void TRANSLATION_API Draw(UINT, UINT ); void TRANSLATION_API DrawInstanced(UINT, UINT, UINT, UINT); void TRANSLATION_API DrawIndexed(UINT, UINT, INT ); void TRANSLATION_API DrawIndexedInstanced(UINT, UINT, UINT, INT, UINT ); void TRANSLATION_API DrawAuto(); void TRANSLATION_API DrawIndexedInstancedIndirect(Resource*, UINT ); void TRANSLATION_API DrawInstancedIndirect(Resource*, UINT ); void TRANSLATION_API Dispatch( UINT, UINT, UINT ); void TRANSLATION_API DispatchIndirect(Resource*, UINT ); // Returns if any work was actually submitted bool TRANSLATION_API Flush(UINT commandListMask); void TRANSLATION_API IaSetTopology(D3D12_PRIMITIVE_TOPOLOGY); void TRANSLATION_API IaSetVertexBuffers( UINT, __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) UINT, Resource* const*, const UINT*, const UINT* ); void TRANSLATION_API IaSetIndexBuffer( Resource*, DXGI_FORMAT, UINT ); template void TRANSLATION_API SetShaderResources( UINT, __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT) UINT, SRV* const* ); template void TRANSLATION_API SetSamplers( UINT, __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) UINT, Sampler* const* ); template void TRANSLATION_API SetConstantBuffers( UINT, __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT) UINT Buffers, Resource* const*, __in_ecount_opt(Buffers) CONST UINT* pFirstConstant, __in_ecount_opt(Buffers) CONST UINT* pNumConstants); void TRANSLATION_API SoSetTargets(_In_range_(0, 4) UINT NumTargets, _In_range_(0, 4) UINT, _In_reads_(NumTargets) Resource* const*, _In_reads_(NumTargets) const UINT* ); void TRANSLATION_API OMSetRenderTargets(__in_ecount(NumRTVs) RTV* const* pRTVs, __in_range(0, 8) UINT NumRTVs, __in_opt DSV *); void TRANSLATION_API OMSetUnorderedAccessViews(UINT, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumViews, __in_ecount(NumViews) UAV* const*, __in_ecount(NumViews) CONST UINT* ); void TRANSLATION_API CsSetUnorderedAccessViews(UINT, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumViews, __in_ecount(NumViews) UAV* const*, __in_ecount(NumViews) CONST UINT* ); void TRANSLATION_API OMSetStencilRef( UINT ); void TRANSLATION_API OMSetBlendFactor(const FLOAT[4]); void TRANSLATION_API SetViewport(UINT slot, const D3D12_VIEWPORT*); void TRANSLATION_API SetNumViewports(UINT num); void TRANSLATION_API SetScissorRect(UINT slot, const D3D12_RECT*); void TRANSLATION_API SetNumScissorRects(UINT num); void TRANSLATION_API SetScissorRectEnable(BOOL); void TRANSLATION_API ClearRenderTargetView(RTV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearDepthStencilView(DSV *, UINT, FLOAT, UINT8, UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearUnorderedAccessViewUint(UAV *, CONST UINT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearUnorderedAccessViewFloat(UAV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearResourceWithNoRenderTarget(Resource* pResource, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects, UINT Subresource, UINT BaseSubresource, DXGI_FORMAT ClearFormat); void TRANSLATION_API ClearVideoDecoderOutputView(VDOV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearVideoProcessorInputView(VPIV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API ClearVideoProcessorOutputView(VPOV *, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); void TRANSLATION_API DiscardView(ViewBase* pView, const D3D12_RECT*, UINT); void TRANSLATION_API DiscardResource(Resource* pResource, const D3D12_RECT*, UINT); void TRANSLATION_API GenMips(SRV *, D3D12_FILTER_TYPE FilterType); void TRANSLATION_API QueryBegin( Async* ); void TRANSLATION_API QueryEnd(Async*); bool TRANSLATION_API QueryGetData(Async*, void*, UINT, bool DoNotFlush, bool AsyncGetData = false); void TRANSLATION_API SetPredication(Query*, BOOL); bool TRANSLATION_API Map(_In_ Resource* pResource, _In_ UINT Subresource, _In_ MAP_TYPE MapType, _In_ bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, _Out_ MappedSubresource* pMappedSubresource); void TRANSLATION_API Unmap(Resource*, UINT, MAP_TYPE, _In_opt_ const D3D12_BOX *pReadWriteRange); bool SynchronizeForMap(Resource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait); bool TRANSLATION_API MapUnderlying(Resource*, UINT, MAP_TYPE, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* ); bool TRANSLATION_API MapUnderlyingSynchronize(Resource*, UINT, MAP_TYPE, bool, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* ); bool TRANSLATION_API MapDiscardBuffer( Resource* pResource, UINT Subresource, MAP_TYPE, bool, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* ); bool TRANSLATION_API MapDynamicTexture( Resource* pResource, UINT Subresource, MAP_TYPE, bool, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* ); bool TRANSLATION_API MapDefault(Resource*pResource, UINT Subresource, MAP_TYPE, bool doNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource*); void TRANSLATION_API UnmapDefault( Resource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange); void TRANSLATION_API UnmapUnderlyingSimple( Resource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange); void TRANSLATION_API UnmapUnderlyingStaging( Resource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange); void TRANSLATION_API UnmapDynamicTexture( Resource*pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange, bool bUploadMappedContents); Resource* TRANSLATION_API CreateRenameCookie(Resource* pResource, ResourceAllocationContext threadingContext); void TRANSLATION_API Rename(Resource* pResource, Resource* pRenameResource); void TRANSLATION_API RenameViaCopy(Resource* pResource, Resource* pRenameResource, UINT DirtyPlaneMask); void TRANSLATION_API DeleteRenameCookie(Resource* pRenameResource); void TRANSLATION_API ClearInputBindings(Resource* pResource); void TRANSLATION_API ClearOutputBindings(Resource* pResource); void TRANSLATION_API ResourceCopy( Resource*, Resource* ); void TRANSLATION_API ResourceResolveSubresource( Resource*, UINT, Resource*, UINT, DXGI_FORMAT ); void TRANSLATION_API SetResourceMinLOD( Resource*, FLOAT ); void TRANSLATION_API CopyStructureCount( Resource*, UINT, UAV* ); void TRANSLATION_API ResourceCopyRegion( Resource*, UINT, UINT, UINT, UINT, Resource*, UINT, const D3D12_BOX*); void TRANSLATION_API ResourceUpdateSubresourceUP( Resource*, UINT, _In_opt_ const D3D12_BOX*, _In_ const VOID*, UINT, UINT); HRESULT TRANSLATION_API GetDeviceState(); enum TILE_MAPPING_FLAG { TILE_MAPPING_NO_OVERWRITE = 0x00000001, }; enum TILE_RANGE_FLAG { TILE_RANGE_NULL = 0x00000001, TILE_RANGE_SKIP = 0x00000002, TILE_RANGE_REUSE_SINGLE_TILE = 0x00000004, }; enum TILE_COPY_FLAG { TILE_COPY_NO_OVERWRITE = 0x00000001, TILE_COPY_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE = 0x00000002, TILE_COPY_SWIZZLED_TILED_RESOURCE_TO_LINEAR_BUFFER = 0x00000004, }; void TRANSLATION_API UpdateTileMappings(Resource* hTiledResource, UINT NumTiledResourceRegions,_In_reads_(NumTiledResourceRegions) const D3D12_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoords,_In_reads_opt_(NumTiledResourceRegions) const D3D12_TILE_REGION_SIZE* pTiledResourceRegionSizes, Resource* hTilePool,UINT NumRanges,_In_reads_opt_(NumRanges) const TILE_RANGE_FLAG* pRangeFlags, _In_reads_opt_(NumRanges) const UINT* pTilePoolStartOffsets, _In_reads_opt_(NumRanges) const UINT* pRangeTileCounts, TILE_MAPPING_FLAG Flags ); void TRANSLATION_API CopyTileMappings(Resource*, _In_ const D3D12_TILED_RESOURCE_COORDINATE*, Resource*, _In_ const D3D12_TILED_RESOURCE_COORDINATE*, _In_ const D3D12_TILE_REGION_SIZE*, TILE_MAPPING_FLAG); void TRANSLATION_API CopyTiles(Resource*, _In_ const D3D12_TILED_RESOURCE_COORDINATE*, _In_ const D3D12_TILE_REGION_SIZE*, Resource*, UINT64, TILE_COPY_FLAG); void TRANSLATION_API UpdateTiles(Resource*, _In_ const D3D12_TILED_RESOURCE_COORDINATE*, _In_ const D3D12_TILE_REGION_SIZE*, const _In_ VOID*, UINT); void TRANSLATION_API TiledResourceBarrier(Resource* pBefore, Resource* pAfter); void TRANSLATION_API ResizeTilePool(Resource*, UINT64); void TRANSLATION_API GetMipPacking(Resource*, _Out_ UINT*, _Out_ UINT*); HRESULT TRANSLATION_API CheckFormatSupport(_Out_ D3D12_FEATURE_DATA_FORMAT_SUPPORT& formatData); bool SupportsRenderTarget(DXGI_FORMAT Format); void TRANSLATION_API CheckMultisampleQualityLevels(DXGI_FORMAT, UINT, D3D12_MULTISAMPLE_QUALITY_LEVEL_FLAGS, _Out_ UINT*); void TRANSLATION_API CheckFeatureSupport(D3D12_FEATURE Feature, _Inout_updates_bytes_(FeatureSupportDataSize)void* pFeatureSupportData, UINT FeatureSupportDataSize); void TRANSLATION_API SetHardwareProtection(Resource*, INT); void TRANSLATION_API SetHardwareProtectionState(BOOL); void TRANSLATION_API Signal(_In_ Fence* pFence, UINT64 Value); void TRANSLATION_API Wait(std::shared_ptr const& pFence, UINT64 Value); void TRANSLATION_API RotateResourceIdentities(Resource* const* ppResources, UINT Resources); HRESULT TRANSLATION_API ResolveSharedResource(Resource* pResource); void TRANSLATION_API ClearState(); void TRANSLATION_API SetMarker(const wchar_t* name); void TRANSLATION_API BeginEvent(const wchar_t* name); void TRANSLATION_API EndEvent(); void TRANSLATION_API SharingContractPresent(_In_ Resource* pResource); void TRANSLATION_API Present( _In_reads_(numSrcSurfaces) PresentSurface const* pSrcSurfaces, UINT numSrcSurfaces, _In_opt_ Resource* pDest, UINT flipInterval, UINT vidPnSourceId, _In_ D3DKMT_PRESENT* pKMTPresent, bool bDoNotSequence, std::function pfnPresentCb); HRESULT TRANSLATION_API CloseAndSubmitGraphicsCommandListForPresent( BOOL commandsAdded, _In_reads_(numSrcSurfaces) const PresentSurface* pSrcSurfaces, UINT numSrcSurfaces, _In_opt_ Resource* pDest, _In_ D3DKMT_PRESENT* pKMTPresent); public: void TRANSLATION_API GetSharedGDIHandle(_In_ Resource *pResource, _Out_ HANDLE *pHandle); void TRANSLATION_API CreateSharedNTHandle(_In_ Resource *pResource, _Out_ HANDLE *pHandle, _In_opt_ SECURITY_ATTRIBUTES *pSA = nullptr); bool ResourceAllocationFallback(ResourceAllocationContext threadingContext); template auto TryAllocateResourceWithFallback(TFunc&& allocateFunc, ResourceAllocationContext threadingContext) { while (true) { try { return allocateFunc(); } catch( _com_error& hrEx ) { if (hrEx.Error() != E_OUTOFMEMORY || !ResourceAllocationFallback(threadingContext)) { throw; } } } } public: // Type // Note: all interfaces in these structs have weak refs // Bindings are remembered separate from immediate context to compute diff for state transitions struct SStageState { SStageState() noexcept(false) = default; void ClearState(EShaderStage stage) noexcept; // Shader-declared bindings do not set pipeline dirty bits at bind time, only slot dirty bits // These slot dirty bits are only interesting if they are below the maximum shader-declared slot, // as determined during pre-draw/dispatch based on the bound shaders CViewBoundState m_SRVs; CConstantBufferBoundState m_CBs; CSamplerBoundState m_Samplers; // Slots for re-asserting state on a new command list D3D12_GPU_DESCRIPTOR_HANDLE m_SRVTableBase{ 0 }; D3D12_GPU_DESCRIPTOR_HANDLE m_CBTableBase{ 0 }; D3D12_GPU_DESCRIPTOR_HANDLE m_SamplerTableBase{ 0 }; UINT m_uConstantBufferOffsets[D3D11_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT] = {}; UINT m_uConstantBufferCounts[D3D11_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT] = {}; }; struct SState { SState() noexcept(false) = default; void ClearState() noexcept; PipelineState* m_pPSO = nullptr; RootSignature* m_pLastGraphicsRootSig = nullptr, *m_pLastComputeRootSig = nullptr; Query* m_pPredicate = nullptr; CViewBoundState m_UAVs, m_CSUAVs; CSimpleBoundState m_RTVs; CSimpleBoundState m_DSVs; CSimpleBoundState m_VBs; CSimpleBoundState m_SO; CSimpleBoundState m_IB; UINT m_LastVBCount = 0; // Slots for re-asserting state on a new command list D3D12_GPU_DESCRIPTOR_HANDLE m_UAVTableBase{ 0 }, m_CSUAVTableBase{ 0 }; SStageState& GetStageState(EShaderStage) noexcept; SStageState m_PS, m_VS, m_GS, m_HS, m_DS, m_CS; }; void PreRender(COMMAND_LIST_TYPE type) noexcept; // draw, dispatch, clear, copy, etc void PostRender(COMMAND_LIST_TYPE type, UINT64 ReassertBitsToAdd = 0); D3D12_BOX GetBoxFromResource(Resource *pSrc, UINT SrcSubresource); D3D12_BOX GetSubresourceBoxFromBox(Resource* pSrc, UINT RequestedSubresource, UINT BaseSubresource, D3D12_BOX const& SrcBox); class BltResolveManager { D3D12TranslationLayer::ImmediateContext& m_ImmCtx; std::map> m_Temps; public: BltResolveManager(D3D12TranslationLayer::ImmediateContext& ImmCtx); Resource* GetBltResolveTempForWindow(HWND hwnd, Resource& presentingResource); } m_BltResolveManager; private: // methods void PreDraw() noexcept(false); void PreDispatch() noexcept(false); // The app should inform the translation layer when a frame has been finished // to hint when trimming work should start // // The translation layer makes guesses at frame ends (i.e. when flush is called) // but isn't aware when a present is done. void TRANSLATION_API PostSubmitNotification(); void PostDraw(); void PostDispatch(); void SameResourceCopy(Resource *pSrc, UINT SrcSubresource, Resource *pDst, UINT DstSubresource, UINT dstX, UINT dstY, UINT dstZ, const D3D12_BOX *pSrcBox); public: void PostCopy(Resource *pSrc, UINT startSubresource, Resource *pDest, UINT dstSubresource, UINT totalNumSubresources); void PostUpload(); void CopyDataToBuffer( ID3D12Resource* pResource, UINT Offset, const void* pData, UINT Size ) noexcept(false); bool HasCommands(COMMAND_LIST_TYPE type) noexcept; UINT GetCommandListTypeMaskForQuery(EQueryType query) noexcept; void PrepForCommandQueueSync(UINT commandListTypeMask); RootSignature* CreateOrRetrieveRootSignature(RootSignatureDesc const& desc) noexcept(false); private: bool Shutdown() noexcept; template UINT CalculateViewSlotsForBindings() noexcept; template UINT CalculateSamplerSlotsForBindings() noexcept; // Mark used in command list, copy to descriptor heap, and bind table template void DirtyShaderResourcesHelper(UINT& HeapSlot) noexcept; template void DirtyConstantBuffersHelper(UINT& HeapSlot) noexcept; template void DirtySamplersHelper(UINT& HeapSlot) noexcept; // Mark used in command list and bind table (descriptors already in heap) template void ApplyShaderResourcesHelper() noexcept; template void ApplyConstantBuffersHelper() noexcept; template void ApplySamplersHelper() noexcept; void SetScissorRectsHelper() noexcept; void RefreshNonHeapBindings(UINT64 DirtyBits) noexcept; ID3D12PipelineState* PrepareGenerateMipsObjects(DXGI_FORMAT Format, D3D12_RESOURCE_DIMENSION Dimension) noexcept(false); void EnsureInternalUAVRootSig() noexcept(false); void EnsureDrawAutoResources() noexcept(false); void EnsureQueryResources() noexcept(false); void EnsureExecuteIndirectResources() noexcept(false); static const UINT NUM_UAV_ROOT_SIG_CONSTANTS = 2; void FormatBuffer(ID3D12Resource* pBuffer, ID3D12PipelineState* pPSO, UINT FirstElement, UINT NumElements, const UINT Constants[NUM_UAV_ROOT_SIG_CONSTANTS] ) noexcept(false); // Helper for views void TransitionResourceForView(ViewBase* pView, D3D12_RESOURCE_STATES desiredState) noexcept; template void ClearViewWithNoRenderTarget(View* pView, CONST FLOAT[4], UINT NumRects, const D3D12_RECT *pRects); UINT GetCurrentCommandListTypeMask() noexcept; void InsertUAVBarriersIfNeeded(CViewBoundState& UAVBindings, UINT NumUAVs) noexcept; public: // Methods UINT GetNodeMask() const noexcept { return 1 << m_nodeIndex; } UINT GetNodeIndex() const noexcept { return m_nodeIndex; } D3D12_HEAP_PROPERTIES GetHeapProperties(D3D12_HEAP_TYPE Type) const noexcept { if (ComputeOnly()) { return CD3DX12_HEAP_PROPERTIES(Type, GetNodeMask(), GetNodeMask()); } else { return m_pDevice12->GetCustomHeapProperties(GetNodeMask(), Type); } } const D3D12_FEATURE_DATA_D3D12_OPTIONS& GetCaps() { return m_caps; } bool ComputeOnly() const {return !!(FeatureLevel() == D3D_FEATURE_LEVEL_1_0_CORE);} public: // variables // D3D11 objects UINT m_uStencilRef; float m_BlendFactor[4]; D3D12_PRIMITIVE_TOPOLOGY m_PrimitiveTopology; BOOL m_PredicateValue; UINT m_auVertexOffsets[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; UINT m_auVertexStrides[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; DXGI_FORMAT m_IndexBufferFormat; UINT m_uIndexBufferOffset; UINT m_uNumScissors; D3D12_RECT m_aScissors[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; UINT m_uNumViewports; D3D12_VIEWPORT m_aViewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; BOOL m_ScissorRectEnable; std::unordered_map> m_RootSignatures; std::unique_ptr m_spPSOCompilationThreadPool; // "Online" descriptor heaps struct OnlineDescriptorHeap { unique_comptr m_pDescriptorHeap; decltype(D3D12_GPU_DESCRIPTOR_HANDLE::ptr) m_DescriptorHeapBase; decltype(D3D12_CPU_DESCRIPTOR_HANDLE::ptr) m_DescriptorHeapBaseCPU; D3D12_DESCRIPTOR_HEAP_DESC m_Desc; UINT m_DescriptorSize; UINT m_BitsToSetOnNewHeap = 0; UINT m_MaxHeapSize; CFencedRingBuffer m_DescriptorRingBuffer; CFencePool< unique_comptr > m_HeapPool; inline D3D12_CPU_DESCRIPTOR_HANDLE CPUHandle(UINT slot) { assert(slot < m_Desc.NumDescriptors); return { m_DescriptorHeapBaseCPU + slot * m_DescriptorSize }; } inline D3D12_GPU_DESCRIPTOR_HANDLE GPUHandle(UINT slot) { assert(slot < m_Desc.NumDescriptors); return { m_DescriptorHeapBase + slot * m_DescriptorSize }; } } m_ViewHeap, m_SamplerHeap; void RollOverHeap(OnlineDescriptorHeap& Heap) noexcept(false); UINT ReserveSlotsForBindings(OnlineDescriptorHeap& Heap, UINT (ImmediateContext::*pfnCalcRequiredSlots)()) noexcept(false); UINT ReserveSlots(OnlineDescriptorHeap& Heap, UINT NumSlots) noexcept(false); D3D12_CPU_DESCRIPTOR_HANDLE m_NullSRVs[(UINT)RESOURCE_DIMENSION::TEXTURECUBEARRAY+1]; D3D12_CPU_DESCRIPTOR_HANDLE m_NullUAVs[(UINT)RESOURCE_DIMENSION::TEXTURECUBEARRAY+1]; D3D12_CPU_DESCRIPTOR_HANDLE m_NullRTV; D3D12_CPU_DESCRIPTOR_HANDLE m_NullSampler; TDeclVector m_UAVDeclScratch; // Offline descriptor heaps CDescriptorHeapManager m_SRVAllocator; CDescriptorHeapManager m_UAVAllocator; CDescriptorHeapManager m_RTVAllocator; CDescriptorHeapManager m_DSVAllocator; CDescriptorHeapManager m_SamplerAllocator; ResourceCache m_ResourceCache; std::vector m_RectCache; // UAV barriers are not managed by the state manager. // The state manager deals with changes in state, where UAV barriers need to be inserted // in steady-state scenarios. std::vector m_vUAVBarriers; // Objects for GenerateMips typedef std::tuple MipGenKey; std::map> m_pGenerateMipsPSOMap; InternalRootSignature m_GenerateMipsRootSig; enum GenerateMipsRootSignatureSlots { eSRV = 0, eRootConstants, eSampler, }; static const UINT NUM_FILTER_TYPES = 2; D3D12_CPU_DESCRIPTOR_HANDLE m_GenerateMipsSamplers[NUM_FILTER_TYPES]; BlitHelper m_BlitHelper{ this }; template CDescriptorHeapManager& GetViewAllocator(); template<> CDescriptorHeapManager& GetViewAllocator() { return m_SRVAllocator; } template<> CDescriptorHeapManager& GetViewAllocator() { return m_UAVAllocator; } template<> CDescriptorHeapManager& GetViewAllocator() { return m_RTVAllocator; } template<> CDescriptorHeapManager& GetViewAllocator() { return m_DSVAllocator; } InternalRootSignature m_InternalUAVRootSig; unique_comptr m_pDrawAutoPSO; unique_comptr m_pFormatQueryPSO; unique_comptr m_pAccumulateQueryPSO; unique_comptr m_pDrawInstancedCommandSignature; unique_comptr m_pDrawIndexedInstancedCommandSignature; unique_comptr m_pDispatchCommandSignature; LIST_ENTRY m_ActiveQueryList; D3D_FEATURE_LEVEL FeatureLevel() const { return m_FeatureLevel; } static DXGI_FORMAT GetParentForFormat(DXGI_FORMAT format); bool UseRoundTripPSOs() { return m_CreationArgs.UseRoundTripPSOs; } TranslationLayerCallbacks const& GetUpperlayerCallbacks() { return m_callbacks; } ResidencyManager &GetResidencyManager() { return m_residencyManager; } ResourceStateManager& GetResourceStateManager() { return m_ResourceStateManager; } MaxFrameLatencyHelper m_MaxFrameLatencyHelper; private: // variables ResourceStateManager m_ResourceStateManager; #if TRANSLATION_LAYER_DBG UINT64 m_DebugFlags; #endif unique_comptr m_pStagingTexture; unique_comptr m_pStagingBuffer; private: // Dynamic/staging resource pools TDynamicBufferPool m_UploadBufferPool; TDynamicBufferPool m_ReadbackBufferPool; TDynamicBufferPool m_DecoderBufferPool; TDynamicBufferPool& GetBufferPool(AllocatorHeapType HeapType) { switch (HeapType) { case AllocatorHeapType::Upload: return m_UploadBufferPool; case AllocatorHeapType::Readback: return m_ReadbackBufferPool; case AllocatorHeapType::Decoder: return m_DecoderBufferPool; default: assert(false); } return m_UploadBufferPool; } // This is the maximum amount of memory the buddy allocator can use. Picking an abritrarily high // cap that allows this to pass tests that can potentially spend the whole GPU's memory on // suballocated heaps static constexpr UINT64 cBuddyMaxBlockSize = 32ll * 1024ll * 1024ll * 1024ll; static bool ResourceNeedsOwnAllocation(UINT64 size, bool cannotBeOffset) { return size > cBuddyAllocatorThreshold || cannotBeOffset; } // These suballocate out of larger heaps. This should not // be used for resources that require transitions since transitions // can only be done on the entire heap, not just the suballocated range ConditionalHeapAllocator m_UploadHeapSuballocator; ConditionalHeapAllocator m_ReadbackHeapSuballocator; ConditionalHeapAllocator m_DecoderHeapSuballocator; ConditionalHeapAllocator& GetAllocator(AllocatorHeapType HeapType) { switch (HeapType) { case AllocatorHeapType::Upload: return m_UploadHeapSuballocator; case AllocatorHeapType::Readback: return m_ReadbackHeapSuballocator; case AllocatorHeapType::Decoder: return m_DecoderHeapSuballocator; default: assert(false); } return m_UploadHeapSuballocator; } COptLockedContainer m_RenamesInFlight; private: // State tracking // Dirty states are marked during sets and converted to command list operations at draw time, to avoid multiple costly conversions due to 11/12 API differences UINT64 m_DirtyStates; // Set to be all states during Flush, bits are cleared as individual sets come in, and all remaining bits are re-asserted on new command lists at draw time UINT64 m_StatesToReassert; SState m_CurrentState; UINT m_nodeIndex; D3D12_FEATURE_DATA_D3D12_OPTIONS m_caps; const TranslationLayerCallbacks m_callbacks; private: static inline bool IsSingleCommandListType(UINT commandListTypeMask) { commandListTypeMask &= ~COMMAND_LIST_TYPE_UNKNOWN_MASK; // ignore UNKNOWN type return commandListTypeMask & (commandListTypeMask - 1) ? false : true; } void DiscardViewImpl(COMMAND_LIST_TYPE commandListType, ViewBase* pView, const D3D12_RECT*, UINT, bool allSubresourcesSame); void DiscardResourceImpl(COMMAND_LIST_TYPE commandListType, Resource* pResource, const D3D12_RECT* pRects, UINT NumRects, bool allSubresourcesSame); void UpdateTileMappingsImpl(COMMAND_LIST_TYPE commandListType, Resource* pResource, UINT NumTiledResourceRegions, _In_reads_(NumTiledResourceRegions) const D3D12_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoords, _In_reads_opt_(NumTiledResourceRegions) const D3D12_TILE_REGION_SIZE* pTiledResourceRegionSizes, Resource* pTilePool, UINT NumRanges, _In_reads_opt_(NumRanges) const TILE_RANGE_FLAG* pRangeFlags, _In_reads_opt_(NumRanges) const UINT* pTilePoolStartOffsets, _In_reads_opt_(NumRanges) const UINT* pRangeTileCounts, TILE_MAPPING_FLAG Flags, bool NeedToSubmit); void CopyTileMappingsImpl(COMMAND_LIST_TYPE commandListType, Resource* pDstTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pDstStartCoords, Resource* pSrcTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pSrcStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, TILE_MAPPING_FLAG Flags); void TiledResourceBarrierImpl(COMMAND_LIST_TYPE commandListType, Resource* pBefore, Resource* pAfter); COMMAND_LIST_TYPE GetFallbackCommandListType(UINT commandListTypeMask); // Device wide scratch space allocation for use in synchronous ops. // Only grows. Free with device. struct { BYTE* GetBuffer(UINT minSize) { if (minSize > m_Size) { m_spScratchBuffer = std::make_unique(minSize); m_Size = minSize; } return m_spScratchBuffer.get(); } private: std::unique_ptr m_spScratchBuffer; UINT m_Size = 0; } m_SyncronousOpScrachSpace; const bool m_bUseRingBufferDescriptorHeaps; public: // cached feature data union ArchitectureFlags { UINT Flags = 0; struct { UINT isTileBasedRenderer : 1; UINT isUMA : 1; UINT iscacheCoherentUMA : 1; UINT isIsolatedMMU : 1; }; }const m_architecture; const D3D12_FEATURE_DATA_D3D12_OPTIONS13& GetOptions13() const { return m_options13; } private: // helper methods to initialize feature data ArchitectureFlags QueryArchitectureFlags(); D3D12_FEATURE_DATA_D3D12_OPTIONS13 m_options13; }; DEFINE_ENUM_FLAG_OPERATORS(ImmediateContext::UpdateSubresourcesFlags); struct SafeRenameResourceCookie { SafeRenameResourceCookie(Resource* c = nullptr) : m_c(c) { } Resource* Detach() { auto c = m_c; m_c = nullptr; return c; } Resource* Get() { return m_c; } void Delete() { if (m_c) { m_c->m_pParent->DeleteRenameCookie(m_c); m_c = nullptr; } } void Reset(Resource* c) { Delete(); m_c = c; } ~SafeRenameResourceCookie() { Delete(); } Resource* m_c = nullptr; }; } // namespace D3D12TranslationLayer ================================================ FILE: include/ImmediateContext.inl ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- template<> inline LIST_ENTRY& CResourceBindings::GetViewList() { return m_ShaderResourceViewList; } template<> inline LIST_ENTRY& CResourceBindings::GetViewList() { return m_RenderTargetViewList; } template<> inline LIST_ENTRY& CResourceBindings::GetViewList() { return m_UnorderedAccessViewList; } template<> inline CSubresourceBindings::BindFunc CResourceBindings::GetBindFunc(EShaderStage stage) { return (stage == e_PS) ? &CSubresourceBindings::PixelShaderResourceViewBound : &CSubresourceBindings::NonPixelShaderResourceViewBound; } template<> inline CSubresourceBindings::BindFunc CResourceBindings::GetBindFunc(EShaderStage) { return &CSubresourceBindings::RenderTargetViewBound; } template<> inline CSubresourceBindings::BindFunc CResourceBindings::GetBindFunc(EShaderStage) { return &CSubresourceBindings::UnorderedAccessViewBound; } template<> inline CSubresourceBindings::BindFunc CResourceBindings::GetUnbindFunc(EShaderStage stage) { return (stage == e_PS) ? &CSubresourceBindings::PixelShaderResourceViewUnbound : &CSubresourceBindings::NonPixelShaderResourceViewUnbound; } template<> inline CSubresourceBindings::BindFunc CResourceBindings::GetUnbindFunc(EShaderStage) { return &CSubresourceBindings::RenderTargetViewUnbound; } template<> inline CSubresourceBindings::BindFunc CResourceBindings::GetUnbindFunc(EShaderStage) { return &CSubresourceBindings::DepthStencilViewUnbound; } template<> inline CSubresourceBindings::BindFunc CResourceBindings::GetUnbindFunc(EShaderStage) { return &CSubresourceBindings::UnorderedAccessViewUnbound; } //---------------------------------------------------------------------------------------------------------------------------------- template void CResourceBindings::ViewBound(View* pView, EShaderStage stage, UINT /*slot*/) { auto& viewBindings = pView->m_currentBindings; pView->IncrementBindRefs(); if (!viewBindings.IsViewBound()) { D3D12TranslationLayer::InsertHeadList(&GetViewList(), &viewBindings.m_ViewBindingList); } CViewSubresourceSubset &viewSubresources = pView->m_subresources; ViewBoundCommon(viewSubresources, GetBindFunc(stage)); } //---------------------------------------------------------------------------------------------------------------------------------- template void CResourceBindings::ViewUnbound(View* pView, EShaderStage stage, UINT /*slot*/) { auto& viewBindings = pView->m_currentBindings; pView->DecrementBindRefs(); if (pView->GetBindRefs() == 0 && viewBindings.IsViewBound()) { D3D12TranslationLayer::RemoveEntryList(&viewBindings.m_ViewBindingList); D3D12TranslationLayer::InitializeListHead(&viewBindings.m_ViewBindingList); } CViewSubresourceSubset &viewSubresources = pView->m_subresources; ViewUnboundCommon(viewSubresources, GetUnbindFunc(stage)); } //---------------------------------------------------------------------------------------------------------------------------------- template<> inline void CResourceBindings::ViewBound(TDSV* pView, EShaderStage, UINT /*slot*/) { assert(!m_bIsDepthStencilViewBound); m_bIsDepthStencilViewBound = true; pView->IncrementBindRefs(); CViewSubresourceSubset &viewSubresources = pView->m_subresources; bool bHasStencil = pView->m_pResource->SubresourceMultiplier() != 1; bool bReadOnlyDepth = !!(pView->GetDesc12().Flags & D3D12_DSV_FLAG_READ_ONLY_DEPTH); bool bReadOnlyStencil = !!(pView->GetDesc12().Flags & D3D12_DSV_FLAG_READ_ONLY_STENCIL); auto pfnDepthBound = bReadOnlyDepth ? &CSubresourceBindings::ReadOnlyDepthStencilViewBound : &CSubresourceBindings::WritableDepthStencilViewBound; if (!bHasStencil || bReadOnlyDepth == bReadOnlyStencil) { ViewBoundCommon(viewSubresources, pfnDepthBound); return; } CViewSubresourceSubset readSubresources(pView->GetDesc12(), pView->m_pResource->AppDesc()->MipLevels(), pView->m_pResource->AppDesc()->ArraySize(), pView->m_pResource->SubresourceMultiplier(), CViewSubresourceSubset::ReadOnly); CViewSubresourceSubset writeSubresources(pView->GetDesc12(), pView->m_pResource->AppDesc()->MipLevels(), pView->m_pResource->AppDesc()->ArraySize(), pView->m_pResource->SubresourceMultiplier(), CViewSubresourceSubset::WriteOnly); // If either of these were empty, then there would be only one type of bind required, and the (readOnlyDepth == readOnlyStencil) check would've covered it assert(!readSubresources.IsEmpty() && !writeSubresources.IsEmpty()); UINT NumViewsReferencingSubresources = m_NumViewsReferencingSubresources; ViewBoundCommon(readSubresources, &CSubresourceBindings::ReadOnlyDepthStencilViewBound); ViewBoundCommon(writeSubresources, &CSubresourceBindings::WritableDepthStencilViewBound); m_NumViewsReferencingSubresources = NumViewsReferencingSubresources + 1; } //---------------------------------------------------------------------------------------------------------------------------------- template<> inline void CResourceBindings::ViewUnbound(TDSV* pView, EShaderStage stage, UINT /*slot*/) { #if TRANSLATION_LAYER_DBG // View bindings aren't used for DSVs auto& viewBindings = pView->m_currentBindings; assert(!viewBindings.IsViewBound()); #endif assert(m_bIsDepthStencilViewBound); m_bIsDepthStencilViewBound = false; pView->DecrementBindRefs(); CViewSubresourceSubset &viewSubresources = pView->m_subresources; ViewUnboundCommon(viewSubresources, GetUnbindFunc(stage)); } //---------------------------------------------------------------------------------------------------------------------------------- // Binding helpers //---------------------------------------------------------------------------------------------------------------------------------- inline void VBBinder::Bound(Resource* pBuffer, UINT slot, EShaderStage) { return ImmediateContext::VertexBufferBound(pBuffer, slot); } inline void VBBinder::Unbound(Resource* pBuffer, UINT slot, EShaderStage) { return ImmediateContext::VertexBufferUnbound(pBuffer, slot); } inline void IBBinder::Bound(Resource* pBuffer, UINT, EShaderStage) { return ImmediateContext::IndexBufferBound(pBuffer); } inline void IBBinder::Unbound(Resource* pBuffer, UINT, EShaderStage) { return ImmediateContext::IndexBufferUnbound(pBuffer); } inline void SOBinder::Bound(Resource* pBuffer, UINT slot, EShaderStage) { return ImmediateContext::StreamOutputBufferBound(pBuffer, slot); } inline void SOBinder::Unbound(Resource* pBuffer, UINT slot, EShaderStage) { return ImmediateContext::StreamOutputBufferUnbound(pBuffer, slot); } //---------------------------------------------------------------------------------------------------------------------------------- inline bool BitSetLessThan(unsigned long bits, UINT slot) { unsigned long index = 0; return BitScanForward(&index, bits) && index < slot; } inline bool BitSetLessThan(unsigned long long bits, UINT slot) { #ifdef BitScanForward64 unsigned long index = 0; return BitScanForward64(&index, bits) && index < slot; #else if (slot < 32) { return BitSetLessThan(static_cast(bits), slot); } if (static_cast(bits)) { return true; } return BitSetLessThan(static_cast(bits >> 32), slot - 32); #endif } //---------------------------------------------------------------------------------------------------------------------------------- template inline bool CBoundState::DirtyBitsUpTo(_In_range_(0, NumBindings) UINT NumBitsToCheck) const noexcept { if (NumBitsToCheck == 0) { return false; } if (NumBindSlots <= 32) { return BitSetLessThan(m_DirtyBits.to_ulong(), NumBitsToCheck); } else if (NumBindSlots <= 64) { return BitSetLessThan(m_DirtyBits.to_ullong(), NumBitsToCheck); } else { constexpr UINT NumBitsPerWord = sizeof(::std::conditional_t) * 8; // First, check whole "words" for any bit being set. UINT NumWordsToCheck = NumBitsToCheck / NumBitsPerWord; for (UINT word = 0; word < NumWordsToCheck; ++word) { if (m_DirtyBits._Getword(word)) { return true; } NumBitsToCheck -= NumBitsPerWord; } // The slot we were asking about was the last bit of the last word we checked. if (NumBitsToCheck == 0) { return false; } // Check for bits inside a word. return BitSetLessThan(m_DirtyBits._Getword(NumWordsToCheck), NumBitsToCheck); } } //---------------------------------------------------------------------------------------------------------------------------------- template void CBoundState::ReassertResourceState() const noexcept { for (UINT i = 0; i < m_NumBound; ++i) { if (m_Bound[i]) { ImmediateContext* pDevice = m_Bound[i]->m_pParent; pDevice->TransitionResourceForBindings(m_Bound[i]); } } } //---------------------------------------------------------------------------------------------------------------------------------- template inline bool CSimpleBoundState::UpdateBinding(_In_range_(0, NumBindings-1) UINT slot, _In_opt_ TBindable* pBindable, EShaderStage stage) noexcept { auto pCurrent = this->m_Bound[slot]; if (__super::UpdateBinding(slot, pBindable)) { TBinder::Unbound(pCurrent, slot, stage); TBinder::Bound(pBindable, slot, stage); return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- template inline bool CViewBoundState::UpdateBinding(_In_range_(0, NumBindings-1) UINT slot, _In_opt_ TBindable* pBindable, EShaderStage stage) noexcept { auto& Current = this->m_Bound[slot]; if (pBindable) { this->m_NumBound = max(this->m_NumBound, slot + 1); } if (Current != pBindable) { if (Current) Current->ViewUnbound(slot, stage); if (pBindable) pBindable->ViewBound(slot, stage); Current = pBindable; // We skip calling TrimNumBound because we just use shader data to determine the actual amount to bind this->m_DirtyBits.set(slot); return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- template inline bool CViewBoundState::IsDirty(TDeclVector const& New, UINT rootSignatureBucketSize, bool bKnownDirty) noexcept { // Note: Even though there are vector resize ops here, they cannot throw, // since the backing memory for the vector was already allocated using reserve(NumBindSlots) bool bDirty = bKnownDirty; for (size_t i = 0; i < New.size(); ++i) { if (i >= m_ShaderData.size()) { // We've never bound this many before m_ShaderData.insert(m_ShaderData.end(), New.begin() + i, New.end()); bDirty = true; break; } // Don't overwrite typed NULLs with untyped NULLs, // any type will work to fill a slot that won't be used if (m_ShaderData[i] != New[i] && New[i] != c_AnyNull) { m_ShaderData[i] = New[i]; bDirty |= this->m_Bound[i] == nullptr; } } if (m_ShaderData.size() < rootSignatureBucketSize) { // Did we move to a larger bucket size? If so, fill the extra shader data to null (unknown) resource dimension m_ShaderData.resize(rootSignatureBucketSize, c_AnyNull); bDirty = true; } else if (m_ShaderData.size() > rootSignatureBucketSize) { // Did we move to a smaller bucket size? If so, shrink the shader data to fit // Don't need to mark as dirty since the root signature won't be able to address the stale descriptors m_ShaderData.resize(rootSignatureBucketSize); } if (!bDirty) { bDirty = this->DirtyBitsUpTo(static_cast(rootSignatureBucketSize)); } return bDirty; } //---------------------------------------------------------------------------------------------------------------------------------- inline bool CConstantBufferBoundState::UpdateBinding(_In_range_(0, NumBindings-1) UINT slot, _In_opt_ Resource* pBindable, EShaderStage stage) noexcept { auto& Current = m_Bound[slot]; if (pBindable) { m_NumBound = max(m_NumBound, slot + 1); } if (Current != pBindable) { ImmediateContext::ConstantBufferUnbound(Current, slot, stage); ImmediateContext::ConstantBufferBound(pBindable, slot, stage); Current = pBindable; // We skip calling TrimNumBound because we just use shader data to determine the actual amount to bind m_DirtyBits.set(slot); return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- inline bool CSamplerBoundState::UpdateBinding(_In_range_(0, NumBindings-1) UINT slot, _In_ Sampler* pBindable) noexcept { auto& Current = m_Bound[slot]; if (pBindable) { m_NumBound = max(m_NumBound, slot + 1); } if (Current != pBindable) { Current = pBindable; // We skip calling TrimNumBound because we just use shader data to determine the actual amount to bind m_DirtyBits.set(slot); return true; } return false; } inline ID3D12Resource* GetUnderlyingResource(Resource* pResource) { if (!pResource) return nullptr; return pResource->GetUnderlyingResource(); } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::DrawInstanced(UINT countPerInstance, UINT instanceCount, UINT vertexStart, UINT instanceStart) { try { PreDraw(); // throw ( _com_error ) GetGraphicsCommandList()->DrawInstanced(countPerInstance, instanceCount, vertexStart, instanceStart); PostDraw(); } catch (_com_error) {} // already handled, but can't touch the command list } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::Draw(UINT count, UINT vertexStart) { DrawInstanced(count, 1, vertexStart, 0); } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::DrawIndexed(UINT indexCount, UINT indexStart, INT vertexStart) { DrawIndexedInstanced(indexCount, 1, indexStart, vertexStart, 0); } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::DrawIndexedInstanced(UINT countPerInstance, UINT instanceCount, UINT indexStart, INT vertexStart, UINT instanceStart) { try { PreDraw(); // throw ( _com_error ) GetGraphicsCommandList()->DrawIndexedInstanced(countPerInstance, instanceCount, indexStart, vertexStart, instanceStart); PostDraw(); } catch (_com_error) {} // already handled, but can't touch the command list } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::DrawAuto() { // If there is no VB bound, then skip Resource* pVertexBuffer = m_CurrentState.m_VBs.GetBound()[0]; if (!pVertexBuffer) { return; } try { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"DrawAuto"); #endif EnsureDrawAutoResources(); // throw( _com_error ) EnsureExecuteIndirectResources(); // throw( _com_error ) // Transition the vertex buffer to the UAV state m_ResourceStateManager.TransitionResource(pVertexBuffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); m_ResourceStateManager.ApplyAllResourceTransitions(); UINT VBOffset = m_auVertexOffsets[0] + GetDynamicBufferOffset(pVertexBuffer); UINT VBStride = m_auVertexStrides[0]; UINT Constants[] = { VBOffset, VBStride }; assert(0 == (pVertexBuffer->GetOffsetToStreamOutputSuffix() % sizeof(UINT))); static_assert(0 == (sizeof(SStreamOutputSuffix) % sizeof(UINT)), "Suffix must be UINT-aligned"); FormatBuffer( pVertexBuffer->GetUnderlyingResource(), m_pDrawAutoPSO.get(), pVertexBuffer->GetOffsetToStreamOutputSuffix() / sizeof(UINT), sizeof(SStreamOutputSuffix) / sizeof(UINT), Constants ); // throw( _com_error ) // This will transition the vertex buffer back to the correct state (DEFAULT_READ | INDIRECT_ARGUMENT) PreDraw(); // throw ( _com_error ) GetGraphicsCommandList()->ExecuteIndirect( m_pDrawInstancedCommandSignature.get(), 1, pVertexBuffer->GetUnderlyingResource(), pVertexBuffer->GetOffsetToStreamOutputSuffix() + offsetof(SStreamOutputSuffix, VertexCountPerInstance), nullptr, 0 ); PostDraw(); } catch (_com_error) {} // already handled, but can't touch the command list } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::DrawIndexedInstancedIndirect(Resource* pBuffer, UINT offset) { m_ResourceStateManager.TransitionResource(pBuffer, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); try { EnsureExecuteIndirectResources(); // throw( _com_error ) PreDraw(); // throw ( _com_error ) auto pAPIBuffer = GetUnderlyingResource(pBuffer); GetGraphicsCommandList()->ExecuteIndirect( m_pDrawIndexedInstancedCommandSignature.get(), 1, pAPIBuffer, offset + GetDynamicBufferOffset(pBuffer), nullptr, 0 ); PostDraw(); } catch (_com_error) {} // already handled, but can't touch the command list } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::DrawInstancedIndirect(Resource* pBuffer, UINT offset) { m_ResourceStateManager.TransitionResource(pBuffer, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); try { EnsureExecuteIndirectResources(); // throw( _com_error ) PreDraw(); // throw ( _com_error ) auto pAPIBuffer = GetUnderlyingResource(pBuffer); GetGraphicsCommandList()->ExecuteIndirect( m_pDrawInstancedCommandSignature.get(), 1, pAPIBuffer, offset + GetDynamicBufferOffset(pBuffer), nullptr, 0 ); PostDraw(); } catch (_com_error) {} // already handled, but can't touch the command list } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::InsertUAVBarriersIfNeeded(CViewBoundState& UAVBindings, UINT NumUAVs) noexcept { // Insert UAV barriers if necessary, and indicate UAV barriers will be necessary next time // TODO: Optimizations here could avoid inserting barriers on read-after-read auto pUAVs = UAVBindings.GetBound(); m_vUAVBarriers.clear(); for (UINT i = 0; i < NumUAVs; ++i) { if (pUAVs[i]) { if (pUAVs[i]->m_pResource->m_Identity->m_LastUAVAccess == GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)) { m_vUAVBarriers.push_back({ D3D12_RESOURCE_BARRIER_TYPE_UAV }); m_vUAVBarriers.back().UAV.pResource = pUAVs[i]->m_pResource->GetUnderlyingResource(); } pUAVs[i]->m_pResource->m_Identity->m_LastUAVAccess = GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS); } } if (m_vUAVBarriers.size()) { GetGraphicsCommandList()->ResourceBarrier((UINT)m_vUAVBarriers.size(), m_vUAVBarriers.data()); } } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::PreDraw() noexcept(false) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"PreDraw"); #endif PreRender(COMMAND_LIST_TYPE::GRAPHICS); if (m_bUseRingBufferDescriptorHeaps) { // Always dirty the state when using ring buffer heaps because we can't safely reuse tables in that case. m_DirtyStates |= EDirtyBits::e_HeapBindingsDirty; } if (m_DirtyStates & e_GraphicsRootSignatureDirty) { m_StatesToReassert |= e_GraphicsBindingsDirty; // All bindings need to be reapplied if (m_CurrentState.m_pLastGraphicsRootSig == nullptr) { // We don't know, so we have to be conservative. m_DirtyStates |= e_GraphicsBindingsDirty; } else if (m_CurrentState.m_pLastGraphicsRootSig != m_CurrentState.m_pPSO->GetRootSignature()) { RootSignatureDesc const& OldDesc = m_CurrentState.m_pLastGraphicsRootSig->m_Desc; RootSignatureDesc const& NewDesc = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc; static constexpr UINT64 CBDirtyBits[] = { e_PSConstantBuffersDirty, e_VSConstantBuffersDirty, e_GSConstantBuffersDirty, e_HSConstantBuffersDirty, e_DSConstantBuffersDirty }; static constexpr UINT64 SRVDirtyBits[] = { e_PSShaderResourcesDirty, e_VSShaderResourcesDirty, e_GSShaderResourcesDirty, e_HSShaderResourcesDirty, e_DSShaderResourcesDirty }; static constexpr UINT64 SamplerDirtyBits[] = { e_PSSamplersDirty, e_VSSamplersDirty, e_GSSamplersDirty, e_HSSamplersDirty, e_DSSamplersDirty }; for (UINT i = 0; i < std::extent::value; ++i) { if (NewDesc.m_ShaderStages[i].GetCBBindingCount() > OldDesc.m_ShaderStages[i].GetCBBindingCount()) { m_DirtyStates |= CBDirtyBits[i]; } if (NewDesc.m_ShaderStages[i].GetSRVBindingCount() > OldDesc.m_ShaderStages[i].GetSRVBindingCount()) { m_DirtyStates |= SRVDirtyBits[i]; } if (NewDesc.m_ShaderStages[i].GetSamplerBindingCount() > OldDesc.m_ShaderStages[i].GetSamplerBindingCount()) { m_DirtyStates |= SamplerDirtyBits[i]; } } if (NewDesc.GetUAVBindingCount() > OldDesc.GetUAVBindingCount()) { m_DirtyStates |= e_UnorderedAccessViewsDirty; } } m_CurrentState.m_pLastGraphicsRootSig = m_CurrentState.m_pPSO->GetRootSignature(); } auto& RootSigDesc = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc; // Shader-declared bindings do not set pipeline dirty bits at bind time, only slot dirty bits // These slot dirty bits are only interesting if they are below the maximum shader-declared slot // Translate slot dirty bits to pipeline dirty bits now, since we know the shader declarations auto pfnSetDirtySRVBindings = [this](SStageState& Stage, SShaderDecls* pShader, const RootSignatureDesc::ShaderStage& shaderStage, EDirtyBits eBit) { const TDeclVector EmptyDecls; if (Stage.m_SRVs.IsDirty(pShader ? pShader->m_ResourceDecls : EmptyDecls, shaderStage.GetSRVBindingCount(), !!(m_DirtyStates & eBit))) { m_DirtyStates |= eBit; } }; auto pfnSetDirtyCBBindings = [this](SStageState& Stage, const RootSignatureDesc::ShaderStage& shaderStage, EDirtyBits eBit) { if (Stage.m_CBs.IsDirty(shaderStage.GetCBBindingCount())) { m_DirtyStates |= eBit; } }; auto pfnSetDirtySamplerBindings = [this](SStageState& Stage, const RootSignatureDesc::ShaderStage& shaderStage, EDirtyBits eBit) { if (Stage.m_Samplers.IsDirty(shaderStage.GetSamplerBindingCount())) { m_DirtyStates |= eBit; } }; pfnSetDirtySRVBindings(m_CurrentState.m_PS, m_CurrentState.m_pPSO->GetShader(), RootSigDesc.GetShaderStage(), e_PSShaderResourcesDirty); pfnSetDirtySRVBindings(m_CurrentState.m_VS, m_CurrentState.m_pPSO->GetShader(), RootSigDesc.GetShaderStage(), e_VSShaderResourcesDirty); pfnSetDirtySRVBindings(m_CurrentState.m_GS, m_CurrentState.m_pPSO->GetShader(), RootSigDesc.GetShaderStage(), e_GSShaderResourcesDirty); pfnSetDirtySRVBindings(m_CurrentState.m_HS, m_CurrentState.m_pPSO->GetShader(), RootSigDesc.GetShaderStage(), e_HSShaderResourcesDirty); pfnSetDirtySRVBindings(m_CurrentState.m_DS, m_CurrentState.m_pPSO->GetShader(), RootSigDesc.GetShaderStage(), e_DSShaderResourcesDirty); pfnSetDirtyCBBindings(m_CurrentState.m_PS, RootSigDesc.GetShaderStage(), e_PSConstantBuffersDirty); pfnSetDirtyCBBindings(m_CurrentState.m_VS, RootSigDesc.GetShaderStage(), e_VSConstantBuffersDirty); pfnSetDirtyCBBindings(m_CurrentState.m_GS, RootSigDesc.GetShaderStage(), e_GSConstantBuffersDirty); pfnSetDirtyCBBindings(m_CurrentState.m_HS, RootSigDesc.GetShaderStage(), e_HSConstantBuffersDirty); pfnSetDirtyCBBindings(m_CurrentState.m_DS, RootSigDesc.GetShaderStage(), e_DSConstantBuffersDirty); pfnSetDirtySamplerBindings(m_CurrentState.m_PS, RootSigDesc.GetShaderStage(), e_PSSamplersDirty); pfnSetDirtySamplerBindings(m_CurrentState.m_VS, RootSigDesc.GetShaderStage(), e_VSSamplersDirty); pfnSetDirtySamplerBindings(m_CurrentState.m_GS, RootSigDesc.GetShaderStage(), e_GSSamplersDirty); pfnSetDirtySamplerBindings(m_CurrentState.m_HS, RootSigDesc.GetShaderStage(), e_HSSamplersDirty); pfnSetDirtySamplerBindings(m_CurrentState.m_DS, RootSigDesc.GetShaderStage(), e_DSSamplersDirty); // Note: UAV decl scratch memory is reserved upfront, so these operations will not throw. m_UAVDeclScratch.clear(); auto pfnMergeUAVDecls = [this](SShaderDecls* pShader) { if (pShader) { for (size_t i = 0; i < pShader->m_UAVDecls.size(); ++i) { if (i >= m_UAVDeclScratch.size()) { m_UAVDeclScratch.insert(m_UAVDeclScratch.end(), pShader->m_UAVDecls.begin() + i, pShader->m_UAVDecls.end()); return; } else if (m_UAVDeclScratch[i] == RESOURCE_DIMENSION::UNKNOWN) { m_UAVDeclScratch[i] = pShader->m_UAVDecls[i]; } } } }; pfnMergeUAVDecls(m_CurrentState.m_pPSO->GetShader()); pfnMergeUAVDecls(m_CurrentState.m_pPSO->GetShader()); pfnMergeUAVDecls(m_CurrentState.m_pPSO->GetShader()); pfnMergeUAVDecls(m_CurrentState.m_pPSO->GetShader()); pfnMergeUAVDecls(m_CurrentState.m_pPSO->GetShader()); if (m_CurrentState.m_UAVs.IsDirty(m_UAVDeclScratch, RootSigDesc.GetUAVBindingCount(), !!(m_DirtyStates & e_UnorderedAccessViewsDirty))) { m_DirtyStates |= e_UnorderedAccessViewsDirty; } // Now that pipeline dirty bits are set appropriately, check if we need to update the descriptor heap UINT ViewHeapSlot = ReserveSlotsForBindings(m_ViewHeap, &ImmediateContext::CalculateViewSlotsForBindings); // throw( _com_error ) UINT SamplerHeapSlot = ReserveSlotsForBindings(m_SamplerHeap, &ImmediateContext::CalculateSamplerSlotsForBindings); // throw( _com_error ) auto& UAVBindings = m_CurrentState.m_UAVs; UINT numUAVs = RootSigDesc.GetUAVBindingCount(); InsertUAVBarriersIfNeeded(UAVBindings, numUAVs); // Update the descriptor heap, and apply dirty bindings if (m_DirtyStates & e_GraphicsBindingsDirty) { // SRVs DirtyShaderResourcesHelper(ViewHeapSlot); DirtyShaderResourcesHelper(ViewHeapSlot); DirtyShaderResourcesHelper(ViewHeapSlot); DirtyShaderResourcesHelper(ViewHeapSlot); DirtyShaderResourcesHelper(ViewHeapSlot); // CBs DirtyConstantBuffersHelper(ViewHeapSlot); DirtyConstantBuffersHelper(ViewHeapSlot); DirtyConstantBuffersHelper(ViewHeapSlot); DirtyConstantBuffersHelper(ViewHeapSlot); DirtyConstantBuffersHelper(ViewHeapSlot); // Samplers DirtySamplersHelper(SamplerHeapSlot); DirtySamplersHelper(SamplerHeapSlot); DirtySamplersHelper(SamplerHeapSlot); DirtySamplersHelper(SamplerHeapSlot); DirtySamplersHelper(SamplerHeapSlot); if (m_DirtyStates & e_UnorderedAccessViewsDirty) { auto& UAVTableBase = m_CurrentState.m_UAVTableBase; static const UINT MaxUAVs = UAVBindings.NumBindings; assert(ViewHeapSlot + numUAVs <= m_ViewHeap.m_Desc.NumDescriptors); D3D12_CPU_DESCRIPTOR_HANDLE UAVDescriptors[MaxUAVs]; UAVBindings.FillDescriptors(UAVDescriptors, m_NullUAVs, RootSigDesc.GetUAVBindingCount()); UAVTableBase = m_ViewHeap.GPUHandle(ViewHeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE UAVTableBaseCPU = m_ViewHeap.CPUHandle(ViewHeapSlot); m_pDevice12->CopyDescriptors(1, &UAVTableBaseCPU, &numUAVs, numUAVs, UAVDescriptors, nullptr /*sizes*/, m_ViewHeap.m_Desc.Type); ViewHeapSlot += numUAVs; } } // Now the current state is up to date, let's apply it to the command list m_StatesToReassert |= (m_DirtyStates & e_GraphicsStateDirty); if (m_StatesToReassert & e_FirstDraw) { m_CurrentState.m_PS.m_SRVs.ReassertResourceState(); m_CurrentState.m_VS.m_SRVs.ReassertResourceState(); m_CurrentState.m_GS.m_SRVs.ReassertResourceState(); m_CurrentState.m_HS.m_SRVs.ReassertResourceState(); m_CurrentState.m_DS.m_SRVs.ReassertResourceState(); m_CurrentState.m_PS.m_CBs.ReassertResourceState(); m_CurrentState.m_VS.m_CBs.ReassertResourceState(); m_CurrentState.m_GS.m_CBs.ReassertResourceState(); m_CurrentState.m_HS.m_CBs.ReassertResourceState(); m_CurrentState.m_DS.m_CBs.ReassertResourceState(); m_CurrentState.m_UAVs.ReassertResourceState(); m_CurrentState.m_VBs.ReassertResourceState(); m_CurrentState.m_IB.ReassertResourceState(); m_CurrentState.m_RTVs.ReassertResourceState(); m_CurrentState.m_DSVs.ReassertResourceState(); m_CurrentState.m_SO.ReassertResourceState(); } m_ResourceStateManager.ApplyAllResourceTransitions(true); if (m_StatesToReassert & e_GraphicsStateDirty) { if (m_StatesToReassert & e_GraphicsRootSignatureDirty) { GetGraphicsCommandList()->SetGraphicsRootSignature(m_CurrentState.m_pPSO->GetRootSignature()->GetForImmediateUse()); m_StatesToReassert |= e_GraphicsBindingsDirty; } if (m_StatesToReassert & e_PipelineStateDirty) { auto pPSO = m_CurrentState.m_pPSO->GetForUse(COMMAND_LIST_TYPE::GRAPHICS); if (!pPSO) { throw _com_error(S_OK); } GetGraphicsCommandList()->SetPipelineState(pPSO); } RefreshNonHeapBindings(m_StatesToReassert); // SRVs ApplyShaderResourcesHelper(); ApplyShaderResourcesHelper(); ApplyShaderResourcesHelper(); ApplyShaderResourcesHelper(); ApplyShaderResourcesHelper(); // CBs ApplyConstantBuffersHelper(); ApplyConstantBuffersHelper(); ApplyConstantBuffersHelper(); ApplyConstantBuffersHelper(); ApplyConstantBuffersHelper(); // Samplers ApplySamplersHelper(); ApplySamplersHelper(); ApplySamplersHelper(); ApplySamplersHelper(); ApplySamplersHelper(); if (m_StatesToReassert & e_UnorderedAccessViewsDirty) { static const UINT UAVTableIndex = 15; auto const& UAVTableBase = m_CurrentState.m_UAVTableBase; GetGraphicsCommandList()->SetGraphicsRootDescriptorTable(UAVTableIndex, UAVTableBase); } // States that cannot be dirty (no recomputing necessary) if (m_StatesToReassert & e_PrimitiveTopologyDirty) { GetGraphicsCommandList()->IASetPrimitiveTopology(m_PrimitiveTopology); } if (m_StatesToReassert & e_BlendFactorDirty) { GetGraphicsCommandList()->OMSetBlendFactor(m_BlendFactor); } if (m_StatesToReassert & e_StencilRefDirty) { GetGraphicsCommandList()->OMSetStencilRef(m_uStencilRef); } if (m_StatesToReassert & e_ViewportsDirty) { GetGraphicsCommandList()->RSSetViewports(m_uNumViewports, reinterpret_cast(m_aViewports)); } if (m_StatesToReassert & e_ScissorRectsDirty) { SetScissorRectsHelper(); } } m_StatesToReassert &= ~e_GraphicsStateDirty; m_DirtyStates &= ~e_GraphicsStateDirty; } //---------------------------------------------------------------------------------------------------------------------------------- template inline UINT ImmediateContext::CalculateViewSlotsForBindings() noexcept { UINT NumRequiredSlots = 0; auto pfnAccumulate = [this, &NumRequiredSlots](UINT dirtyBit, UINT count) { if (m_DirtyStates & dirtyBit) { NumRequiredSlots += count; } }; auto& RootSigDesc = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc; if (bDispatch) { pfnAccumulate(e_CSShaderResourcesDirty, RootSigDesc.GetShaderStage().GetSRVBindingCount()); pfnAccumulate(e_CSConstantBuffersDirty, RootSigDesc.GetShaderStage().GetCBBindingCount()); pfnAccumulate(e_CSUnorderedAccessViewsDirty, RootSigDesc.GetUAVBindingCount()); } else { pfnAccumulate(e_PSShaderResourcesDirty, RootSigDesc.GetShaderStage().GetSRVBindingCount()); pfnAccumulate(e_VSShaderResourcesDirty, RootSigDesc.GetShaderStage().GetSRVBindingCount()); pfnAccumulate(e_GSShaderResourcesDirty, RootSigDesc.GetShaderStage().GetSRVBindingCount()); pfnAccumulate(e_HSShaderResourcesDirty, RootSigDesc.GetShaderStage().GetSRVBindingCount()); pfnAccumulate(e_DSShaderResourcesDirty, RootSigDesc.GetShaderStage().GetSRVBindingCount()); pfnAccumulate(e_PSConstantBuffersDirty, RootSigDesc.GetShaderStage().GetCBBindingCount()); pfnAccumulate(e_VSConstantBuffersDirty, RootSigDesc.GetShaderStage().GetCBBindingCount()); pfnAccumulate(e_GSConstantBuffersDirty, RootSigDesc.GetShaderStage().GetCBBindingCount()); pfnAccumulate(e_HSConstantBuffersDirty, RootSigDesc.GetShaderStage().GetCBBindingCount()); pfnAccumulate(e_DSConstantBuffersDirty, RootSigDesc.GetShaderStage().GetCBBindingCount()); pfnAccumulate(e_UnorderedAccessViewsDirty, RootSigDesc.GetUAVBindingCount()); } return NumRequiredSlots; } template inline UINT ImmediateContext::CalculateSamplerSlotsForBindings() noexcept { UINT NumRequiredSlots = 0; auto pfnAccumulate = [this, &NumRequiredSlots](UINT dirtyBit, UINT count) { if (m_DirtyStates & dirtyBit) { NumRequiredSlots += count; } }; auto& RootSigDesc = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc; if (bDispatch) { pfnAccumulate(e_CSSamplersDirty, RootSigDesc.GetShaderStage().GetSamplerBindingCount()); } else { pfnAccumulate(e_PSSamplersDirty, RootSigDesc.GetShaderStage().GetSamplerBindingCount()); pfnAccumulate(e_VSSamplersDirty, RootSigDesc.GetShaderStage().GetSamplerBindingCount()); pfnAccumulate(e_GSSamplersDirty, RootSigDesc.GetShaderStage().GetSamplerBindingCount()); pfnAccumulate(e_HSSamplersDirty, RootSigDesc.GetShaderStage().GetSamplerBindingCount()); pfnAccumulate(e_DSSamplersDirty, RootSigDesc.GetShaderStage().GetSamplerBindingCount()); } return NumRequiredSlots; } //---------------------------------------------------------------------------------------------------------------------------------- template inline void ImmediateContext::DirtyShaderResourcesHelper(UINT& HeapSlot) noexcept { typedef SShaderTraits TShaderTraits; if ((m_DirtyStates & TShaderTraits::c_ShaderResourcesDirty) == 0) { return; } SStageState& CurrentState = TShaderTraits::CurrentStageState(m_CurrentState); auto& SRVBindings = CurrentState.m_SRVs; UINT RootSigHWM = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc.GetShaderStage().GetSRVBindingCount(); UINT numSRVs = RootSigHWM; static const UINT MaxSRVs = SRVBindings.NumBindings; assert(HeapSlot + numSRVs <= m_ViewHeap.m_Desc.NumDescriptors); D3D12_CPU_DESCRIPTOR_HANDLE Descriptors[MaxSRVs]; SRVBindings.FillDescriptors(Descriptors, m_NullSRVs, RootSigHWM); CurrentState.m_SRVTableBase = m_ViewHeap.GPUHandle(HeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE SRVTableBaseCPU = m_ViewHeap.CPUHandle(HeapSlot); m_pDevice12->CopyDescriptors(1, &SRVTableBaseCPU, &numSRVs, numSRVs, Descriptors, nullptr /*sizes*/, m_ViewHeap.m_Desc.Type); HeapSlot += numSRVs; } //---------------------------------------------------------------------------------------------------------------------------------- template inline void ImmediateContext::DirtyConstantBuffersHelper(UINT& HeapSlot) noexcept { typedef SShaderTraits TShaderTraits; if ((m_DirtyStates & TShaderTraits::c_ConstantBuffersDirty) == 0) { return; } SStageState& CurrentState = TShaderTraits::CurrentStageState(m_CurrentState); auto& CBBindings = CurrentState.m_CBs; UINT numCBs = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc.GetShaderStage().GetCBBindingCount(); static const UINT MaxCBs = CBBindings.NumBindings; assert(HeapSlot + numCBs <= m_ViewHeap.m_Desc.NumDescriptors); for (UINT i = 0; i < numCBs; ++i) { CBBindings.ResetDirty(i); auto pBuffer = CBBindings.GetBound()[i]; D3D12_CONSTANT_BUFFER_VIEW_DESC CBDesc; UINT APIOffset = CurrentState.m_uConstantBufferOffsets[i] * 16; UINT APISize = CurrentState.m_uConstantBufferCounts[i] * 16; GetBufferViewDesc(pBuffer, CBDesc, APIOffset, APISize); D3D12_CPU_DESCRIPTOR_HANDLE Descriptor = m_ViewHeap.CPUHandle(HeapSlot + i); m_pDevice12->CreateConstantBufferView(&CBDesc, Descriptor); } CurrentState.m_CBTableBase = m_ViewHeap.GPUHandle(HeapSlot); HeapSlot += numCBs; } //---------------------------------------------------------------------------------------------------------------------------------- template inline void ImmediateContext::DirtySamplersHelper(UINT& HeapSlot) noexcept { typedef SShaderTraits TShaderTraits; if ((m_DirtyStates & TShaderTraits::c_SamplersDirty) == 0) { return; } SStageState& CurrentState = TShaderTraits::CurrentStageState(m_CurrentState); UINT RootSigHWM = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc.GetShaderStage().GetSamplerBindingCount(); UINT numSamplers = RootSigHWM; auto& SamplerBindings = CurrentState.m_Samplers; static const UINT MaxSamplers = SamplerBindings.NumBindings; assert(HeapSlot + numSamplers <= m_SamplerHeap.m_Desc.NumDescriptors); D3D12_CPU_DESCRIPTOR_HANDLE Descriptors[MaxSamplers]; SamplerBindings.FillDescriptors(Descriptors, &m_NullSampler, RootSigHWM); CurrentState.m_SamplerTableBase = m_SamplerHeap.GPUHandle(HeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE SamplerTableBaseCPU = m_SamplerHeap.CPUHandle(HeapSlot); m_pDevice12->CopyDescriptors(1, &SamplerTableBaseCPU, &numSamplers, numSamplers, Descriptors, nullptr /*sizes*/, m_SamplerHeap.m_Desc.Type); HeapSlot += numSamplers; } //---------------------------------------------------------------------------------------------------------------------------------- template inline void GetBufferViewDesc(Resource* pBuffer, TDesc& Desc, UINT APIOffset, UINT APISize = -1) { if (pBuffer) { Desc.SizeInBytes = min(GetDynamicBufferSize(pBuffer, APIOffset), APISize); Desc.BufferLocation = Desc.SizeInBytes == 0 ? 0 : // TODO: Cache the GPU VA, frequent calls to this cause a CPU hotspot (pBuffer->GetUnderlyingResource()->GetGPUVirtualAddress() // Base of the DX12 resource + pBuffer->GetSubresourcePlacement(0).Offset // Base of the DX11 resource after renaming + APIOffset); // Offset from the base of the DX11 resource } else { Desc.BufferLocation = 0; Desc.SizeInBytes = 0; } } //---------------------------------------------------------------------------------------------------------------------------------- template struct DescriptorBindFuncs { static decltype(&ID3D12GraphicsCommandList::SetGraphicsRootDescriptorTable) GetBindFunc() { return &ID3D12GraphicsCommandList::SetGraphicsRootDescriptorTable; } }; template<> struct DescriptorBindFuncs { static decltype(&ID3D12GraphicsCommandList::SetComputeRootDescriptorTable) GetBindFunc() { return &ID3D12GraphicsCommandList::SetComputeRootDescriptorTable; } }; //---------------------------------------------------------------------------------------------------------------------------------- template struct SRVBindIndices; template<> struct SRVBindIndices { static const UINT c_TableIndex = 1; }; template<> struct SRVBindIndices { static const UINT c_TableIndex = 4; }; template<> struct SRVBindIndices { static const UINT c_TableIndex = 7; }; template<> struct SRVBindIndices { static const UINT c_TableIndex = 10; }; template<> struct SRVBindIndices { static const UINT c_TableIndex = 13; }; template<> struct SRVBindIndices { static const UINT c_TableIndex = 1; }; template inline void ImmediateContext::ApplyShaderResourcesHelper() noexcept { typedef SShaderTraits TShaderTraits; SStageState& CurrentState = TShaderTraits::CurrentStageState(m_CurrentState); if ((m_StatesToReassert & TShaderTraits::c_ShaderResourcesDirty) == 0) { return; } (GetGraphicsCommandList()->*DescriptorBindFuncs::GetBindFunc())( SRVBindIndices::c_TableIndex, CurrentState.m_SRVTableBase); } //---------------------------------------------------------------------------------------------------------------------------------- template struct CBBindIndices; template<> struct CBBindIndices { static const UINT c_TableIndex = 0; }; template<> struct CBBindIndices { static const UINT c_TableIndex = 3; }; template<> struct CBBindIndices { static const UINT c_TableIndex = 6; }; template<> struct CBBindIndices { static const UINT c_TableIndex = 9; }; template<> struct CBBindIndices { static const UINT c_TableIndex = 12; }; template<> struct CBBindIndices { static const UINT c_TableIndex = 0; }; template inline void ImmediateContext::ApplyConstantBuffersHelper() noexcept { typedef SShaderTraits TShaderTraits; SStageState& CurrentState = TShaderTraits::CurrentStageState(m_CurrentState); if ((m_StatesToReassert & TShaderTraits::c_ConstantBuffersDirty) == 0) { return; } (GetGraphicsCommandList()->*DescriptorBindFuncs::GetBindFunc())( CBBindIndices::c_TableIndex, CurrentState.m_CBTableBase); } //---------------------------------------------------------------------------------------------------------------------------------- template struct SamplerBindIndices; template<> struct SamplerBindIndices { static const UINT c_TableIndex = 2; }; template<> struct SamplerBindIndices { static const UINT c_TableIndex = 5; }; template<> struct SamplerBindIndices { static const UINT c_TableIndex = 8; }; template<> struct SamplerBindIndices { static const UINT c_TableIndex = 11; }; template<> struct SamplerBindIndices { static const UINT c_TableIndex = 14; }; template<> struct SamplerBindIndices { static const UINT c_TableIndex = 2; }; template inline void ImmediateContext::ApplySamplersHelper() noexcept { typedef SShaderTraits TShaderTraits; SStageState& CurrentState = TShaderTraits::CurrentStageState(m_CurrentState); if ((m_StatesToReassert & TShaderTraits::c_SamplersDirty) == 0) { return; } (GetGraphicsCommandList()->*DescriptorBindFuncs::GetBindFunc())( SamplerBindIndices::c_TableIndex, CurrentState.m_SamplerTableBase); } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::Dispatch(UINT x, UINT y, UINT z) { // Early out if no compute shader has been set if (!m_CurrentState.m_pPSO->GetComputeDesc().CS.pShaderBytecode) { return; } try { PreDispatch(); // throw ( _com_error ) GetGraphicsCommandList()->Dispatch(x, y, z); PostDispatch(); } catch (_com_error) {} // already handled, but can't touch the command list } //---------------------------------------------------------------------------------------------------------------------------------- inline void TRANSLATION_API ImmediateContext::DispatchIndirect(Resource* pBuffer, UINT offset) { // Early out if no compute shader has been set if (!m_CurrentState.m_pPSO->GetComputeDesc().CS.pShaderBytecode) { return; } m_ResourceStateManager.TransitionResource(pBuffer, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); try { EnsureExecuteIndirectResources(); // throw( _com_error ) PreDispatch(); // throw ( _com_error ) auto pAPIBuffer = GetUnderlyingResource(pBuffer); GetGraphicsCommandList()->ExecuteIndirect( m_pDispatchCommandSignature.get(), 1, pAPIBuffer, offset + GetDynamicBufferOffset(pBuffer), nullptr, 0 ); PostDispatch(); } catch (_com_error) {} // already handled, but can't touch the command list } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::PreDispatch() noexcept(false) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"PreDispatch"); #endif PreRender(COMMAND_LIST_TYPE::GRAPHICS); if (m_bUseRingBufferDescriptorHeaps) { // Always dirty the state when using ring buffer heaps because we can't safely reuse tables in that case. m_DirtyStates |= EDirtyBits::e_HeapBindingsDirty; } if (m_DirtyStates & e_ComputeRootSignatureDirty) { m_StatesToReassert |= e_ComputeBindingsDirty; // All bindings need to be reapplied if (m_CurrentState.m_pLastComputeRootSig == nullptr) { // We don't know, so we have to be conservative. m_DirtyStates |= e_ComputeBindingsDirty; } else if (m_CurrentState.m_pLastComputeRootSig != m_CurrentState.m_pPSO->GetRootSignature()) { RootSignatureDesc const& OldDesc = m_CurrentState.m_pLastComputeRootSig->m_Desc; RootSignatureDesc const& NewDesc = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc; if (NewDesc.m_ShaderStages[0].GetCBBindingCount() > OldDesc.m_ShaderStages[0].GetCBBindingCount()) { m_DirtyStates |= e_CSConstantBuffersDirty; } if (NewDesc.m_ShaderStages[0].GetSRVBindingCount() > OldDesc.m_ShaderStages[0].GetSRVBindingCount()) { m_DirtyStates |= e_CSShaderResourcesDirty; } if (NewDesc.m_ShaderStages[0].GetSamplerBindingCount() > OldDesc.m_ShaderStages[0].GetSamplerBindingCount()) { m_DirtyStates |= e_CSSamplersDirty; } if (NewDesc.GetUAVBindingCount() > OldDesc.GetUAVBindingCount()) { m_DirtyStates |= e_CSUnorderedAccessViewsDirty; } } m_CurrentState.m_pLastComputeRootSig = m_CurrentState.m_pPSO->GetRootSignature(); } // See PreDraw for comments regarding how dirty bits for bindings are managed auto& RootSigDesc = m_CurrentState.m_pPSO->GetRootSignature()->m_Desc; auto& shaderStage = RootSigDesc.GetShaderStage(); const TDeclVector EmptyDecls; auto pComputeShader = m_CurrentState.m_pPSO->GetShader(); m_DirtyStates |= m_CurrentState.m_CS.m_SRVs.IsDirty(pComputeShader ? pComputeShader->m_ResourceDecls : EmptyDecls, shaderStage.GetSRVBindingCount(), !!(m_DirtyStates & e_CSShaderResourcesDirty)) ? e_CSShaderResourcesDirty : 0; m_DirtyStates |= m_CurrentState.m_CS.m_CBs.IsDirty(shaderStage.GetCBBindingCount()) ? e_CSConstantBuffersDirty : 0; m_DirtyStates |= m_CurrentState.m_CS.m_Samplers.IsDirty(shaderStage.GetSamplerBindingCount()) ? e_CSSamplersDirty : 0; m_DirtyStates |= m_CurrentState.m_CSUAVs.IsDirty(pComputeShader ? pComputeShader->m_UAVDecls : EmptyDecls, RootSigDesc.GetUAVBindingCount(), !!(m_DirtyStates & e_CSUnorderedAccessViewsDirty)) ? e_CSUnorderedAccessViewsDirty : 0; // Now that pipeline dirty bits are set appropriately, check if we need to update the descriptor heap UINT ViewHeapSlot = ReserveSlotsForBindings(m_ViewHeap, &ImmediateContext::CalculateViewSlotsForBindings); // throw( _com_error ) UINT SamplerHeapSlot = 0; if (!ComputeOnly()) { SamplerHeapSlot = ReserveSlotsForBindings(m_SamplerHeap, &ImmediateContext::CalculateSamplerSlotsForBindings); // throw( _com_error ) } auto& UAVBindings = m_CurrentState.m_CSUAVs; UINT numUAVs = RootSigDesc.GetUAVBindingCount(); InsertUAVBarriersIfNeeded(UAVBindings, numUAVs); // Second pass copies data into the the descriptor heap if (m_DirtyStates & e_ComputeBindingsDirty) { DirtyShaderResourcesHelper(ViewHeapSlot); DirtyConstantBuffersHelper(ViewHeapSlot); if (!ComputeOnly()) { DirtySamplersHelper(SamplerHeapSlot); } if (m_DirtyStates & e_CSUnorderedAccessViewsDirty) { auto& UAVTableBase = m_CurrentState.m_CSUAVTableBase; static const UINT MaxUAVs = UAVBindings.NumBindings; assert(ViewHeapSlot + numUAVs <= m_ViewHeap.m_Desc.NumDescriptors); D3D12_CPU_DESCRIPTOR_HANDLE UAVDescriptors[MaxUAVs]; UAVBindings.FillDescriptors(UAVDescriptors, m_NullUAVs, RootSigDesc.GetUAVBindingCount()); UAVTableBase = m_ViewHeap.GPUHandle(ViewHeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE UAVTableBaseCPU = m_ViewHeap.CPUHandle(ViewHeapSlot); m_pDevice12->CopyDescriptors(1, &UAVTableBaseCPU, &numUAVs, numUAVs, UAVDescriptors, nullptr /*sizes*/, m_ViewHeap.m_Desc.Type); ViewHeapSlot += numUAVs; } } m_StatesToReassert |= (m_DirtyStates & e_ComputeStateDirty); if (m_StatesToReassert & e_FirstDispatch) { m_CurrentState.m_CS.m_SRVs.ReassertResourceState(); m_CurrentState.m_CS.m_CBs.ReassertResourceState(); m_CurrentState.m_CSUAVs.ReassertResourceState(); } m_ResourceStateManager.ApplyAllResourceTransitions(true); if (m_StatesToReassert & e_ComputeStateDirty) { if (m_StatesToReassert & e_ComputeRootSignatureDirty) { GetGraphicsCommandList()->SetComputeRootSignature(m_CurrentState.m_pPSO->GetRootSignature()->GetForImmediateUse()); m_StatesToReassert |= e_ComputeBindingsDirty; } if (m_StatesToReassert & e_PipelineStateDirty) { auto pPSO = m_CurrentState.m_pPSO->GetForUse(COMMAND_LIST_TYPE::GRAPHICS); if (!pPSO) { throw _com_error(S_OK); } GetGraphicsCommandList()->SetPipelineState(pPSO); } ApplyShaderResourcesHelper(); ApplyConstantBuffersHelper(); if (ComputeOnly()) { if (m_StatesToReassert & e_CSSamplersDirty) { // For compute-only, we turn our sampler tables into SRVs. // Make sure we bind something that's valid to the sampler slot, and just make it mirror the SRVs. GetGraphicsCommandList()->SetComputeRootDescriptorTable(SamplerBindIndices::c_TableIndex, m_CurrentState.m_CS.m_SRVTableBase); } } else { ApplySamplersHelper(); } if (m_StatesToReassert & e_CSUnorderedAccessViewsDirty) { static const UINT UAVTableIndex = 3; auto const& UAVTableBase = m_CurrentState.m_CSUAVTableBase; GetGraphicsCommandList()->SetComputeRootDescriptorTable(UAVTableIndex, UAVTableBase); } } m_StatesToReassert &= ~e_ComputeStateDirty; m_DirtyStates &= ~e_ComputeStateDirty; } //---------------------------------------------------------------------------------------------------------------------------------- inline ID3D12CommandQueue *ImmediateContext::GetCommandQueue(COMMAND_LIST_TYPE type) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { return m_CommandLists[(UINT)type]->GetCommandQueue(); } else { return nullptr; } } //---------------------------------------------------------------------------------------------------------------------------------- inline ID3D12GraphicsCommandList *ImmediateContext::GetGraphicsCommandList() noexcept { return m_CommandLists[(UINT)COMMAND_LIST_TYPE::GRAPHICS]->GetGraphicsCommandList(); } //---------------------------------------------------------------------------------------------------------------------------------- inline ID3D12VideoDecodeCommandList2 *ImmediateContext::GetVideoDecodeCommandList() noexcept { if (m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_DECODE]) { return m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_DECODE]->GetVideoDecodeCommandList(); } else { return nullptr; } } //---------------------------------------------------------------------------------------------------------------------------------- inline ID3D12VideoProcessCommandList2 *ImmediateContext::GetVideoProcessCommandList() noexcept { if (m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_PROCESS]) { return m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_PROCESS]->GetVideoProcessCommandList(); } else { return nullptr; } } // There is an MSVC bug causing a bogus warning to be emitted here for x64 only, while compiling ApplyAllResourceTransitions #pragma warning(push) #pragma warning(disable: 4789) //---------------------------------------------------------------------------------------------------------------------------------- inline CommandListManager *ImmediateContext::GetCommandListManager(COMMAND_LIST_TYPE type) noexcept { return type != COMMAND_LIST_TYPE::UNKNOWN ? m_CommandLists[(UINT)type].get() : nullptr; } #pragma warning(pop) //---------------------------------------------------------------------------------------------------------------------------------- inline ID3D12CommandList *ImmediateContext::GetCommandList(COMMAND_LIST_TYPE commandListType) noexcept { if (commandListType != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)commandListType]) { return m_CommandLists[(UINT)commandListType]->GetCommandList(); } else { return nullptr; } } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT64 ImmediateContext::GetCommandListID(COMMAND_LIST_TYPE type) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { return m_CommandLists[(UINT)type]->GetCommandListID(); } else { return 0; } } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT64 ImmediateContext::GetCommandListIDInterlockedRead(COMMAND_LIST_TYPE type) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { return m_CommandLists[(UINT)type]->GetCommandListIDInterlockedRead(); } else { return 0; } } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT64 ImmediateContext::GetCommandListIDWithCommands(COMMAND_LIST_TYPE type) noexcept { // This method gets the ID of the last command list that actually has commands, which is either // the current command list, if it has commands, or the previously submitted command list if the // current is empty. // // The result of this method is the fence id that will be signaled after a flush, and is used so that // Async::End can track query completion correctly. UINT64 Id = 0; if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { Id = m_CommandLists[(UINT)type]->GetCommandListID(); assert(Id); if (!m_CommandLists[(UINT)type]->HasCommands() && !m_CommandLists[(UINT)type]->NeedSubmitFence()) { Id -= 1; // Go back one command list } } return Id; } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT64 ImmediateContext::GetCompletedFenceValue(COMMAND_LIST_TYPE type) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { return m_CommandLists[(UINT)type]->GetCompletedFenceValue(); } else { return 0; } } //---------------------------------------------------------------------------------------------------------------------------------- inline Fence *ImmediateContext::GetFence(COMMAND_LIST_TYPE type) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { return m_CommandLists[(UINT)type]->GetFence(); } else { return nullptr; } } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::CloseCommandList(UINT commandListTypeMask) noexcept { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if ((commandListTypeMask & (1 << i)) && m_CommandLists[i]) { m_CommandLists[i]->CloseCommandList(); } } } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::ResetCommandList(UINT commandListTypeMask) noexcept { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if ((commandListTypeMask & (1 << i)) && m_CommandLists[i]) { m_CommandLists[i]->ResetCommandList(); } } } //---------------------------------------------------------------------------------------------------------------------------------- inline HRESULT ImmediateContext::EnqueueSetEvent(UINT commandListTypeMask, HANDLE hEvent) noexcept { #ifdef USE_PIX PIXSetMarker(0ull, L"EnqueueSetEvent"); #endif HRESULT hr = S_OK; ID3D12Fence *pFences[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; UINT64 FenceValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; UINT nLists = 0; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if ((commandListTypeMask & (1 << i)) && m_CommandLists[i]) { pFences[nLists] = m_CommandLists[i]->GetFence()->Get(); try { FenceValues[nLists] = m_CommandLists[i]->EnsureFlushedAndFenced(); // throws } catch (_com_error& e) { return e.Error(); } catch (std::bad_alloc&) { return E_OUTOFMEMORY; } ++nLists; } } hr = m_pDevice12_1->SetEventOnMultipleFenceCompletion( pFences, FenceValues, nLists, D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL, hEvent); return hr; } //---------------------------------------------------------------------------------------------------------------------------------- inline HRESULT ImmediateContext::EnqueueSetEvent(COMMAND_LIST_TYPE commandListType, HANDLE hEvent) noexcept { if (commandListType != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)commandListType]) { return m_CommandLists[(UINT)commandListType]->EnqueueSetEvent(hEvent); } else { return E_UNEXPECTED; } } //---------------------------------------------------------------------------------------------------------------------------------- inline bool ImmediateContext::WaitForCompletion(UINT commandListTypeMask) noexcept { UINT nLists = 0; HANDLE hEvents[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if ((commandListTypeMask & (1 << i)) && m_CommandLists[i]) { hEvents[nLists] = m_CommandLists[i]->GetEvent(); if (FAILED(m_CommandLists[i]->EnqueueSetEvent(hEvents[nLists]))) { return false; } ++nLists; } } DWORD waitRet = WaitForMultipleObjects(nLists, hEvents, TRUE, INFINITE); UNREFERENCED_PARAMETER(waitRet); assert(waitRet == WAIT_OBJECT_0); return true; } //---------------------------------------------------------------------------------------------------------------------------------- inline bool ImmediateContext::WaitForCompletion(COMMAND_LIST_TYPE commandListType) { if (commandListType != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)commandListType]) { return m_CommandLists[(UINT)commandListType]->WaitForCompletion(); // throws } else { return false; } } //---------------------------------------------------------------------------------------------------------------------------------- inline bool ImmediateContext::WaitForFenceValue(COMMAND_LIST_TYPE commandListType, UINT64 FenceValue) { if (commandListType != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)commandListType]) { return m_CommandLists[(UINT)commandListType]->WaitForFenceValue(FenceValue); // throws } else { return false; } } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::SubmitCommandList(UINT commandListTypeMask) { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if ((commandListTypeMask & (1 << i)) && m_CommandLists[i]) { m_CommandLists[i]->SubmitCommandList(); // throws } } } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::SubmitCommandList(COMMAND_LIST_TYPE commandListType) { if (commandListType != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)commandListType]) { m_CommandLists[(UINT)commandListType]->SubmitCommandList(); // throws } } //---------------------------------------------------------------------------------------------------------------------------------- inline void ImmediateContext::AdditionalCommandsAdded(COMMAND_LIST_TYPE type) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { m_CommandLists[(UINT)type]->AdditionalCommandsAdded(); } } inline void ImmediateContext::UploadHeapSpaceAllocated(COMMAND_LIST_TYPE type, UINT64 HeapSize) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { m_CommandLists[(UINT)type]->UploadHeapSpaceAllocated(HeapSize); } } //---------------------------------------------------------------------------------------------------------------------------------- inline bool ImmediateContext::HasCommands(COMMAND_LIST_TYPE type) noexcept { if (type != COMMAND_LIST_TYPE::UNKNOWN && m_CommandLists[(UINT)type]) { return m_CommandLists[(UINT)type]->HasCommands(); } else { return false; } } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT ImmediateContext::GetCurrentCommandListTypeMask() noexcept { UINT Mask = 0; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if (m_CommandLists[i]) { Mask |= (1 << i); } } return Mask; } inline UINT ImmediateContext::GetCommandListTypeMaskForQuery(EQueryType query) noexcept { UINT commandListTypeMask = GetCurrentCommandListTypeMask(); if (query != e_QUERY_EVENT && query != e_QUERY_TIMESTAMP && query != e_QUERY_TIMESTAMPDISJOINT) { commandListTypeMask &= COMMAND_LIST_TYPE_GRAPHICS_MASK; } return commandListTypeMask; } }; ================================================ FILE: include/MaxFrameLatencyHelper.hpp ================================================ #pragma once namespace D3D12TranslationLayer { class MaxFrameLatencyHelper { public: void Init(ImmediateContext* pImmCtx); void SetMaximumFrameLatency(UINT MaxFrameLatency); UINT GetMaximumFrameLatency(); bool IsMaximumFrameLatencyReached(); void WaitForMaximumFrameLatency(); void RecordPresentFenceValue(UINT64 fenceValue); private: // Maximum frame latency can be modified or polled from application threads, // while presents are enqueued from a driver worker thread. // We only need to be as good as kernel, so the app thread only needs to reflect // work done by the driver thread, not work that's queued against it. std::recursive_mutex m_FrameLatencyLock; UINT m_MaximumFrameLatency = 3; CircularArray m_PresentFenceValues = {}; // The fence value to wait on decltype(m_PresentFenceValues)::iterator m_PresentFenceValuesBegin = m_PresentFenceValues.begin(); // The fence value to write to decltype(m_PresentFenceValues)::iterator m_PresentFenceValuesEnd = m_PresentFenceValuesBegin; ImmediateContext* m_pImmediateContext; }; } ================================================ FILE: include/PipelineState.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class RootSignature; enum EPipelineType { e_Draw = 0, e_Dispatch }; struct GRAPHICS_PIPELINE_STATE_DESC { Shader* pVertexShader; Shader* pPixelShader; Shader* pGeometryShader; Shader* pDomainShader; Shader* pHullShader; D3D12_STREAM_OUTPUT_DESC StreamOutput; D3D12_BLEND_DESC BlendState; UINT SampleMask; D3D12_RASTERIZER_DESC RasterizerState; D3D12_DEPTH_STENCIL_DESC DepthStencilState; D3D12_INPUT_LAYOUT_DESC InputLayout; D3D12_INDEX_BUFFER_STRIP_CUT_VALUE IBStripCutValue; D3D12_PRIMITIVE_TOPOLOGY_TYPE PrimitiveTopologyType; UINT NumRenderTargets; DXGI_FORMAT RTVFormats[8]; DXGI_FORMAT DSVFormat; DXGI_SAMPLE_DESC SampleDesc; UINT NodeMask; operator D3D12_GRAPHICS_PIPELINE_STATE_DESC() const { D3D12_GRAPHICS_PIPELINE_STATE_DESC Ret = {}; Ret.VS = pVertexShader ? pVertexShader->GetByteCode() : D3D12_SHADER_BYTECODE{}; Ret.PS = pPixelShader ? pPixelShader->GetByteCode() : D3D12_SHADER_BYTECODE{}; Ret.GS = pGeometryShader ? pGeometryShader->GetByteCode() : D3D12_SHADER_BYTECODE{}; Ret.DS = pDomainShader ? pDomainShader->GetByteCode() : D3D12_SHADER_BYTECODE{}; Ret.HS = pHullShader ? pHullShader->GetByteCode() : D3D12_SHADER_BYTECODE{}; Ret.StreamOutput = StreamOutput; Ret.InputLayout = InputLayout; Ret.BlendState = BlendState; Ret.DepthStencilState = DepthStencilState; Ret.RasterizerState = RasterizerState; Ret.NumRenderTargets = NumRenderTargets; Ret.SampleDesc = SampleDesc; Ret.SampleMask = SampleMask; memcpy(Ret.RTVFormats, RTVFormats, sizeof(RTVFormats)); Ret.DSVFormat = DSVFormat; Ret.IBStripCutValue = IBStripCutValue; Ret.PrimitiveTopologyType = PrimitiveTopologyType; Ret.NodeMask = NodeMask; return Ret; } }; struct COMPUTE_PIPELINE_STATE_DESC { Shader* pCompute; UINT NodeMask; operator D3D12_COMPUTE_PIPELINE_STATE_DESC() const { D3D12_COMPUTE_PIPELINE_STATE_DESC Ret = {}; Ret.CS = pCompute->GetByteCode(); Ret.NodeMask = NodeMask; return Ret; } }; struct PipelineState : protected DeviceChildImpl { public: EPipelineType GetPipelineStateType() { return m_PipelineStateType; } const D3D12_GRAPHICS_PIPELINE_STATE_DESC &GetGraphicsDesc() { assert(m_PipelineStateType == e_Draw); return Graphics.m_Desc; } const D3D12_COMPUTE_PIPELINE_STATE_DESC &GetComputeDesc() { assert(m_PipelineStateType == e_Dispatch); return Compute.m_Desc; } template SShaderDecls *GetShader() { switch (Shader) { case e_PS: return Graphics.pPixelShader; case e_VS: return Graphics.pVertexShader; case e_GS: return Graphics.pGeometryShader; case e_HS: return Graphics.pHullShader; case e_DS: return Graphics.pDomainShader; case e_CS: return Compute.pComputeShader; default: assert(false); return nullptr; } } RootSignature* GetRootSignature() { return m_pRootSignature; } PipelineState(ImmediateContext *pContext, const GRAPHICS_PIPELINE_STATE_DESC &desc); PipelineState(ImmediateContext *pContext, const COMPUTE_PIPELINE_STATE_DESC &desc); ~PipelineState(); ID3D12PipelineState* GetForUse(COMMAND_LIST_TYPE CommandListType) { if (m_ThreadpoolWork) { m_ThreadpoolWork.Wait(false); } return DeviceChildImpl::GetForUse(CommandListType); } protected: const EPipelineType m_PipelineStateType; RootSignature* const m_pRootSignature; union { struct Graphics { D3D12_GRAPHICS_PIPELINE_STATE_DESC m_Desc; Shader* pVertexShader; Shader* pPixelShader; Shader* pGeometryShader; Shader* pDomainShader; Shader* pHullShader; } Graphics; struct Compute { D3D12_COMPUTE_PIPELINE_STATE_DESC m_Desc; Shader* pComputeShader; } Compute; }; std::unique_ptr spInputElements; std::unique_ptr spSODecls; UINT SOStrides[D3D12_SO_STREAM_COUNT]; CThreadPoolWork m_ThreadpoolWork; template void Create(); template void CreateImpl(); }; }; ================================================ FILE: include/PrecompiledShaders.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.0 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_VertexID 0 x 0 VERTID uint x // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float xyzw // TEXCOORD 0 xy 1 NONE float xy // vs_5_0 dcl_globalFlags refactoringAllowed dcl_immediateConstantBuffer { { -1.000000, 1.000000, 1.000000, 0}, { 1.000000, 1.000000, 1.000000, 0}, { -1.000000, -1.000000, 1.000000, 0}, { 1.000000, -1.000000, 1.000000, 0}, { 0, 0, 0, 0}, { 1.000000, 0, 0, 0}, { 0, 1.000000, 0, 0}, { 1.000000, 1.000000, 0, 0} } dcl_input_sgv v0.x, vertex_id dcl_output_siv o0.xyzw, position dcl_output o1.xy dcl_temps 1 mov o0.zw, l(0,0,1.000000,1.000000) mov r0.x, v0.x mov o0.xy, icb[r0.x + 0].xyxx mov o1.xy, icb[r0.x + 4].xyxx ret // Approximately 5 instruction slots used #endif const BYTE g_GenMipsVS[] = { 68, 88, 66, 67, 55, 175, 128, 232, 35, 233, 104, 122, 185, 228, 52, 31, 160, 64, 123, 99, 1, 0, 0, 0, 12, 3, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 168, 0, 0, 0, 220, 0, 0, 0, 52, 1, 0, 0, 112, 2, 0, 0, 82, 68, 69, 70, 108, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 0, 5, 254, 255, 0, 1, 0, 0, 60, 0, 0, 0, 82, 68, 49, 49, 60, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 48, 46, 49, 48, 48, 49, 49, 46, 48, 0, 73, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 83, 86, 95, 86, 101, 114, 116, 101, 120, 73, 68, 0, 79, 83, 71, 78, 80, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 12, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 83, 72, 69, 88, 52, 1, 0, 0, 80, 0, 1, 0, 77, 0, 0, 0, 106, 8, 0, 1, 53, 24, 0, 0, 34, 0, 0, 0, 0, 0, 128, 191, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 128, 191, 0, 0, 128, 191, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 128, 191, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 96, 0, 0, 4, 18, 16, 16, 0, 0, 0, 0, 0, 6, 0, 0, 0, 103, 0, 0, 4, 242, 32, 16, 0, 0, 0, 0, 0, 1, 0, 0, 0, 101, 0, 0, 3, 50, 32, 16, 0, 1, 0, 0, 0, 104, 0, 0, 2, 1, 0, 0, 0, 54, 0, 0, 8, 194, 32, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 128, 63, 54, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 16, 16, 0, 0, 0, 0, 0, 54, 0, 0, 6, 50, 32, 16, 0, 0, 0, 0, 0, 70, 144, 144, 0, 10, 0, 16, 0, 0, 0, 0, 0, 54, 0, 0, 7, 50, 32, 16, 0, 1, 0, 0, 0, 70, 144, 208, 0, 4, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.0 // // // Buffer Definitions: // // cbuffer GenMipsConstants // { // // uint mipLevel; // Offset: 0 Size: 4 // uint slice; // Offset: 4 Size: 4 // float zCoord; // Offset: 8 Size: 4 [unused] // // } // // // Resource Bindings: // // Name Type Format Dim HLSL Bind Count // ------------------------------ ---------- ------- ----------- -------------- ------ // textureSampler sampler NA NA s0 1 // inputTexture texture float4 1darray t0 1 // GenMipsConstants cbuffer NA NA cb0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float x // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[1], immediateIndexed dcl_sampler s0, mode_default dcl_resource_texture1darray (float,float,float,float) t0 dcl_input_ps linear v1.x dcl_output o0.xyzw dcl_temps 1 utof r0.xy, cb0[0].yxyy mov r0.z, v1.x sample_l_indexable(texture1darray)(float,float,float,float) o0.xyzw, r0.zxzz, t0.xyzw, s0, r0.y ret // Approximately 4 instruction slots used #endif const BYTE g_GenMipsPS1D[] = { 68, 88, 66, 67, 11, 206, 31, 232, 41, 28, 253, 201, 237, 22, 132, 103, 73, 129, 24, 173, 1, 0, 0, 0, 32, 4, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 52, 2, 0, 0, 140, 2, 0, 0, 192, 2, 0, 0, 132, 3, 0, 0, 82, 68, 69, 70, 248, 1, 0, 0, 1, 0, 0, 0, 204, 0, 0, 0, 3, 0, 0, 0, 60, 0, 0, 0, 0, 5, 255, 255, 0, 1, 0, 0, 200, 1, 0, 0, 82, 68, 49, 49, 60, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 171, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 3, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 13, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 116, 101, 120, 116, 117, 114, 101, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 71, 101, 110, 77, 105, 112, 115, 67, 111, 110, 115, 116, 97, 110, 116, 115, 0, 171, 171, 171, 184, 0, 0, 0, 3, 0, 0, 0, 228, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 108, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 144, 1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 108, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 150, 1, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 164, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 109, 105, 112, 76, 101, 118, 101, 108, 0, 100, 119, 111, 114, 100, 0, 171, 0, 0, 19, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, 1, 0, 0, 115, 108, 105, 99, 101, 0, 122, 67, 111, 111, 114, 100, 0, 102, 108, 111, 97, 116, 0, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 157, 1, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 48, 46, 49, 48, 48, 49, 49, 46, 48, 0, 73, 83, 71, 78, 80, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 1, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 97, 114, 103, 101, 116, 0, 171, 171, 83, 72, 69, 88, 188, 0, 0, 0, 80, 0, 0, 0, 47, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 4, 70, 142, 32, 0, 0, 0, 0, 0, 1, 0, 0, 0, 90, 0, 0, 3, 0, 96, 16, 0, 0, 0, 0, 0, 88, 56, 0, 4, 0, 112, 16, 0, 0, 0, 0, 0, 85, 85, 0, 0, 98, 16, 0, 3, 18, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 1, 0, 0, 0, 86, 0, 0, 6, 50, 0, 16, 0, 0, 0, 0, 0, 22, 133, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 5, 66, 0, 16, 0, 0, 0, 0, 0, 10, 16, 16, 0, 1, 0, 0, 0, 72, 0, 0, 141, 194, 1, 0, 128, 67, 85, 21, 0, 242, 32, 16, 0, 0, 0, 0, 0, 38, 10, 16, 0, 0, 0, 0, 0, 70, 126, 16, 0, 0, 0, 0, 0, 0, 96, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.0 // // // Buffer Definitions: // // cbuffer GenMipsConstants // { // // uint mipLevel; // Offset: 0 Size: 4 // uint slice; // Offset: 4 Size: 4 // float zCoord; // Offset: 8 Size: 4 [unused] // // } // // // Resource Bindings: // // Name Type Format Dim HLSL Bind Count // ------------------------------ ---------- ------- ----------- -------------- ------ // textureSampler sampler NA NA s0 1 // inputTexture texture float4 2darray t0 1 // GenMipsConstants cbuffer NA NA cb0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[1], immediateIndexed dcl_sampler s0, mode_default dcl_resource_texture2darray (float,float,float,float) t0 dcl_input_ps linear v1.xy dcl_output o0.xyzw dcl_temps 1 utof r0.xy, cb0[0].yxyy mov r0.zw, v1.xxxy sample_l_indexable(texture2darray)(float,float,float,float) o0.xyzw, r0.zwxz, t0.xyzw, s0, r0.y ret // Approximately 4 instruction slots used #endif const BYTE g_GenMipsPS2D[] = { 68, 88, 66, 67, 186, 116, 161, 209, 254, 182, 155, 45, 114, 1, 184, 216, 147, 189, 183, 37, 1, 0, 0, 0, 32, 4, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 52, 2, 0, 0, 140, 2, 0, 0, 192, 2, 0, 0, 132, 3, 0, 0, 82, 68, 69, 70, 248, 1, 0, 0, 1, 0, 0, 0, 204, 0, 0, 0, 3, 0, 0, 0, 60, 0, 0, 0, 0, 5, 255, 255, 0, 1, 0, 0, 200, 1, 0, 0, 82, 68, 49, 49, 60, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 171, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 13, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 116, 101, 120, 116, 117, 114, 101, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 71, 101, 110, 77, 105, 112, 115, 67, 111, 110, 115, 116, 97, 110, 116, 115, 0, 171, 171, 171, 184, 0, 0, 0, 3, 0, 0, 0, 228, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 108, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 144, 1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 108, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 150, 1, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 164, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 109, 105, 112, 76, 101, 118, 101, 108, 0, 100, 119, 111, 114, 100, 0, 171, 0, 0, 19, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, 1, 0, 0, 115, 108, 105, 99, 101, 0, 122, 67, 111, 111, 114, 100, 0, 102, 108, 111, 97, 116, 0, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 157, 1, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 48, 46, 49, 48, 48, 49, 49, 46, 48, 0, 73, 83, 71, 78, 80, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 97, 114, 103, 101, 116, 0, 171, 171, 83, 72, 69, 88, 188, 0, 0, 0, 80, 0, 0, 0, 47, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 4, 70, 142, 32, 0, 0, 0, 0, 0, 1, 0, 0, 0, 90, 0, 0, 3, 0, 96, 16, 0, 0, 0, 0, 0, 88, 64, 0, 4, 0, 112, 16, 0, 0, 0, 0, 0, 85, 85, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 1, 0, 0, 0, 86, 0, 0, 6, 50, 0, 16, 0, 0, 0, 0, 0, 22, 133, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 5, 194, 0, 16, 0, 0, 0, 0, 0, 6, 20, 16, 0, 1, 0, 0, 0, 72, 0, 0, 141, 2, 2, 0, 128, 67, 85, 21, 0, 242, 32, 16, 0, 0, 0, 0, 0, 230, 8, 16, 0, 0, 0, 0, 0, 70, 126, 16, 0, 0, 0, 0, 0, 0, 96, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.0 // // // Buffer Definitions: // // cbuffer GenMipsConstants // { // // uint mipLevel; // Offset: 0 Size: 4 // uint slice; // Offset: 4 Size: 4 [unused] // float zCoord; // Offset: 8 Size: 4 // // } // // // Resource Bindings: // // Name Type Format Dim HLSL Bind Count // ------------------------------ ---------- ------- ----------- -------------- ------ // textureSampler sampler NA NA s0 1 // inputTexture texture float4 3d t0 1 // GenMipsConstants cbuffer NA NA cb0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float // TEXCOORD 0 xy 1 NONE float xy // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET float xyzw // ps_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[1], immediateIndexed dcl_sampler s0, mode_default dcl_resource_texture3d (float,float,float,float) t0 dcl_input_ps linear v1.xy dcl_output o0.xyzw dcl_temps 2 utof r0.x, cb0[0].x mov r1.xy, v1.xyxx mov r1.z, cb0[0].z sample_l_indexable(texture3d)(float,float,float,float) o0.xyzw, r1.xyzx, t0.xyzw, s0, r0.x ret // Approximately 5 instruction slots used #endif const BYTE g_GenMipsPS3D[] = { 68, 88, 66, 67, 14, 22, 247, 113, 85, 27, 224, 254, 183, 151, 161, 160, 174, 70, 64, 235, 1, 0, 0, 0, 56, 4, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 52, 2, 0, 0, 140, 2, 0, 0, 192, 2, 0, 0, 156, 3, 0, 0, 82, 68, 69, 70, 248, 1, 0, 0, 1, 0, 0, 0, 204, 0, 0, 0, 3, 0, 0, 0, 60, 0, 0, 0, 0, 5, 255, 255, 0, 1, 0, 0, 200, 1, 0, 0, 82, 68, 49, 49, 60, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 156, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 171, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 8, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 13, 0, 0, 0, 184, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 116, 101, 120, 116, 117, 114, 101, 83, 97, 109, 112, 108, 101, 114, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 71, 101, 110, 77, 105, 112, 115, 67, 111, 110, 115, 116, 97, 110, 116, 115, 0, 171, 171, 171, 184, 0, 0, 0, 3, 0, 0, 0, 228, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 1, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 108, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 144, 1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 108, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 150, 1, 0, 0, 8, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 164, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 109, 105, 112, 76, 101, 118, 101, 108, 0, 100, 119, 111, 114, 100, 0, 171, 0, 0, 19, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101, 1, 0, 0, 115, 108, 105, 99, 101, 0, 122, 67, 111, 111, 114, 100, 0, 102, 108, 111, 97, 116, 0, 171, 0, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 157, 1, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 48, 46, 49, 48, 48, 49, 49, 46, 48, 0, 73, 83, 71, 78, 80, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 84, 69, 88, 67, 79, 79, 82, 68, 0, 171, 171, 171, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 97, 114, 103, 101, 116, 0, 171, 171, 83, 72, 69, 88, 212, 0, 0, 0, 80, 0, 0, 0, 53, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 4, 70, 142, 32, 0, 0, 0, 0, 0, 1, 0, 0, 0, 90, 0, 0, 3, 0, 96, 16, 0, 0, 0, 0, 0, 88, 40, 0, 4, 0, 112, 16, 0, 0, 0, 0, 0, 85, 85, 0, 0, 98, 16, 0, 3, 50, 16, 16, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 86, 0, 0, 6, 18, 0, 16, 0, 0, 0, 0, 0, 10, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 5, 50, 0, 16, 0, 1, 0, 0, 0, 70, 16, 16, 0, 1, 0, 0, 0, 54, 0, 0, 6, 66, 0, 16, 0, 1, 0, 0, 0, 42, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, 141, 66, 1, 0, 128, 67, 85, 21, 0, 242, 32, 16, 0, 0, 0, 0, 0, 70, 2, 16, 0, 1, 0, 0, 0, 70, 126, 16, 0, 0, 0, 0, 0, 0, 96, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.0 // // // Buffer Definitions: // // cbuffer Constants // { // // uint VBOffset; // Offset: 0 Size: 4 // uint VBStride; // Offset: 4 Size: 4 // // } // // // Resource Bindings: // // Name Type Format Dim HLSL Bind Count // ------------------------------ ---------- ------- ----------- -------------- ------ // SuffixBuffer UAV uint buf u0 1 // Constants cbuffer NA NA cb0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // no Input // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // no Output cs_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[1], immediateIndexed dcl_uav_typed_buffer (uint,uint,uint,uint) u0 dcl_temps 1 dcl_thread_group 1, 1, 1 ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r0.x, l(0, 0, 0, 0), u0.xyzw ult r0.y, cb0[0].x, r0.x iadd r0.x, r0.x, -cb0[0].x and r0.x, r0.x, r0.y udiv r0.x, null, r0.x, cb0[0].y ult r0.y, l(0), cb0[0].y and r0.x, r0.x, r0.y store_uav_typed u0.xyzw, l(1,1,1,1), r0.xxxx store_uav_typed u0.xyzw, l(2,2,2,2), l(1,1,1,1) store_uav_typed u0.xyzw, l(3,3,3,3), l(0,0,0,0) store_uav_typed u0.xyzw, l(4,4,4,4), l(0,0,0,0) ret // Approximately 12 instruction slots used #endif const BYTE g_DrawAutoCS[] = { 68, 88, 66, 67, 30, 107, 129, 74, 200, 126, 219, 212, 32, 244, 184, 25, 118, 176, 115, 117, 1, 0, 0, 0, 104, 4, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 168, 1, 0, 0, 184, 1, 0, 0, 200, 1, 0, 0, 204, 3, 0, 0, 82, 68, 69, 70, 108, 1, 0, 0, 1, 0, 0, 0, 148, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 0, 5, 83, 67, 0, 1, 0, 0, 57, 1, 0, 0, 82, 68, 49, 49, 60, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 83, 117, 102, 102, 105, 120, 66, 117, 102, 102, 101, 114, 0, 67, 111, 110, 115, 116, 97, 110, 116, 115, 0, 171, 137, 0, 0, 0, 2, 0, 0, 0, 172, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 12, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 48, 1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 12, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 86, 66, 79, 102, 102, 115, 101, 116, 0, 100, 119, 111, 114, 100, 0, 171, 0, 0, 19, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1, 0, 0, 86, 66, 83, 116, 114, 105, 100, 101, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 48, 46, 49, 48, 48, 49, 49, 46, 48, 0, 171, 171, 171, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 252, 1, 0, 0, 80, 0, 5, 0, 127, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 4, 70, 142, 32, 0, 0, 0, 0, 0, 1, 0, 0, 0, 156, 8, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 68, 68, 0, 0, 104, 0, 0, 2, 1, 0, 0, 0, 155, 0, 0, 4, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 163, 0, 0, 140, 66, 0, 0, 128, 3, 17, 17, 0, 18, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, 238, 17, 0, 0, 0, 0, 0, 79, 0, 0, 8, 34, 0, 16, 0, 0, 0, 0, 0, 10, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 30, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 128, 32, 128, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 78, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 0, 208, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 26, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 8, 34, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 26, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 164, 0, 0, 10, 242, 224, 17, 0, 0, 0, 0, 0, 2, 64, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 6, 0, 16, 0, 0, 0, 0, 0, 164, 0, 0, 13, 242, 224, 17, 0, 0, 0, 0, 0, 2, 64, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 64, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 164, 0, 0, 13, 242, 224, 17, 0, 0, 0, 0, 0, 2, 64, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 164, 0, 0, 13, 242, 224, 17, 0, 0, 0, 0, 0, 2, 64, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 12, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.0 // // // Buffer Definitions: // // cbuffer Constants // { // // uint NumStreams; // Offset: 0 Size: 4 // uint Unused; // Offset: 4 Size: 4 [unused] // // } // // // Resource Bindings: // // Name Type Format Dim HLSL Bind Count // ------------------------------ ---------- ------- ----------- -------------- ------ // QueryBuffer UAV uint buf u0 1 // Constants cbuffer NA NA cb0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // no Input // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // no Output cs_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[1], immediateIndexed dcl_uav_typed_buffer (uint,uint,uint,uint) u0 dcl_temps 2 dcl_thread_group 1, 1, 1 mov r0.xy, l(0,0,0,0) loop uge r0.z, r0.y, cb0[0].x breakc_nz r0.z ishl r0.z, r0.y, l(2) ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r0.z, r0.zzzz, u0.yzxw bfi r1.xyz, l(30, 31, 31, 0), l(2, 1, 2, 0), r0.yyyy, l(1, 1, 2, 0) ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r0.w, r1.xxxx, u0.yzwx ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r1.x, r1.zzzz, u0.xyzw bfi r1.y, l(31), l(1), r1.y, l(3) ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r1.y, r1.yyyy, u0.yxzw ine r0.zw, r0.zzzw, r1.xxxy or r0.z, r0.w, r0.z movc r0.x, r0.z, l(1), r0.x iadd r0.y, r0.y, l(1) endloop store_uav_typed u0.xyzw, l(0,0,0,0), r0.xxxx store_uav_typed u0.xyzw, l(1,1,1,1), l(0,0,0,0) ret // Approximately 19 instruction slots used #endif const BYTE g_FormatQueryCS[] = { 68, 88, 66, 67, 4, 38, 171, 58, 85, 201, 114, 23, 18, 106, 77, 9, 128, 185, 88, 115, 1, 0, 0, 0, 4, 5, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 168, 1, 0, 0, 184, 1, 0, 0, 200, 1, 0, 0, 104, 4, 0, 0, 82, 68, 69, 70, 108, 1, 0, 0, 1, 0, 0, 0, 148, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 0, 5, 83, 67, 0, 1, 0, 0, 59, 1, 0, 0, 82, 68, 49, 49, 60, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 81, 117, 101, 114, 121, 66, 117, 102, 102, 101, 114, 0, 67, 111, 110, 115, 116, 97, 110, 116, 115, 0, 171, 171, 136, 0, 0, 0, 2, 0, 0, 0, 172, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 16, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 52, 1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 16, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 78, 117, 109, 83, 116, 114, 101, 97, 109, 115, 0, 100, 119, 111, 114, 100, 0, 171, 171, 171, 0, 0, 19, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 1, 0, 0, 85, 110, 117, 115, 101, 100, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 48, 46, 49, 48, 48, 49, 49, 46, 48, 0, 171, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 152, 2, 0, 0, 80, 0, 5, 0, 166, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 4, 70, 142, 32, 0, 0, 0, 0, 0, 1, 0, 0, 0, 156, 8, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 68, 68, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 155, 0, 0, 4, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 54, 0, 0, 8, 50, 0, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 1, 80, 0, 0, 8, 66, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 10, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 42, 0, 16, 0, 0, 0, 0, 0, 41, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 2, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 66, 0, 16, 0, 0, 0, 0, 0, 166, 10, 16, 0, 0, 0, 0, 0, 150, 236, 17, 0, 0, 0, 0, 0, 140, 0, 0, 20, 114, 0, 16, 0, 1, 0, 0, 0, 2, 64, 0, 0, 30, 0, 0, 0, 31, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 2, 64, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 86, 5, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 130, 0, 16, 0, 0, 0, 0, 0, 6, 0, 16, 0, 1, 0, 0, 0, 150, 227, 17, 0, 0, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 18, 0, 16, 0, 1, 0, 0, 0, 166, 10, 16, 0, 1, 0, 0, 0, 70, 238, 17, 0, 0, 0, 0, 0, 140, 0, 0, 11, 34, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 31, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 26, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 3, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 34, 0, 16, 0, 1, 0, 0, 0, 86, 5, 16, 0, 1, 0, 0, 0, 22, 238, 17, 0, 0, 0, 0, 0, 39, 0, 0, 7, 194, 0, 16, 0, 0, 0, 0, 0, 166, 14, 16, 0, 0, 0, 0, 0, 6, 4, 16, 0, 1, 0, 0, 0, 60, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 55, 0, 0, 9, 18, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 30, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 22, 0, 0, 1, 164, 0, 0, 10, 242, 224, 17, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 16, 0, 0, 0, 0, 0, 164, 0, 0, 13, 242, 224, 17, 0, 0, 0, 0, 0, 2, 64, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 19, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.0.10011.0 // // // Buffer Definitions: // // cbuffer Constants // { // // uint NumInstances; // Offset: 0 Size: 4 // uint NumValuesPerInstance; // Offset: 4 Size: 4 // // } // // // Resource Bindings: // // Name Type Format Dim HLSL Bind Count // ------------------------------ ---------- ------- ----------- -------------- ------ // QueryBuffer UAV uint buf u0 1 // Constants cbuffer NA NA cb0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // no Input // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // no Output cs_5_0 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[1], immediateIndexed dcl_uav_typed_buffer (uint,uint,uint,uint) u0 dcl_temps 4 dcl_thread_group 1, 1, 1 mov r0.x, l(1) loop uge r0.y, r0.x, cb0[0].x breakc_nz r0.y mov r0.y, l(0) loop uge r0.z, r0.y, cb0[0].y breakc_nz r0.z ishl r0.z, r0.y, l(1) ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r0.w, r0.zzzz, u0.yzwx bfi r1.x, l(31), l(1), r0.y, l(1) ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r1.y, r1.xxxx, u0.yxzw imad r1.z, r0.x, cb0[0].y, r0.y ishl r1.w, r1.z, l(1) ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r1.w, r1.wwww, u0.yzwx bfi r1.z, l(31), l(1), r1.z, l(1) ld_uav_typed_indexable(buffer)(uint,uint,uint,uint) r1.z, r1.zzzz, u0.yzxw uaddc r2.x, r3.x, r0.w, r1.w iadd r0.w, r1.z, r1.y iadd r0.w, r3.x, r0.w store_uav_typed u0.xyzw, r0.zzzz, r2.xxxx store_uav_typed u0.xyzw, r1.xxxx, r0.wwww iadd r0.y, r0.y, l(1) endloop iadd r0.x, r0.x, l(1) endloop ret // Approximately 27 instruction slots used #endif const BYTE g_AccumulateQueryCS[] = { 68, 88, 66, 67, 174, 83, 119, 153, 220, 81, 44, 182, 89, 139, 173, 242, 110, 155, 230, 182, 1, 0, 0, 0, 104, 5, 0, 0, 5, 0, 0, 0, 52, 0, 0, 0, 184, 1, 0, 0, 200, 1, 0, 0, 216, 1, 0, 0, 204, 4, 0, 0, 82, 68, 69, 70, 124, 1, 0, 0, 1, 0, 0, 0, 148, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 0, 5, 83, 67, 0, 1, 0, 0, 73, 1, 0, 0, 82, 68, 49, 49, 60, 0, 0, 0, 24, 0, 0, 0, 32, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 124, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 81, 117, 101, 114, 121, 66, 117, 102, 102, 101, 114, 0, 67, 111, 110, 115, 116, 97, 110, 116, 115, 0, 171, 171, 136, 0, 0, 0, 2, 0, 0, 0, 172, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 16, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 52, 1, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 16, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 78, 117, 109, 73, 110, 115, 116, 97, 110, 99, 101, 115, 0, 100, 119, 111, 114, 100, 0, 171, 0, 0, 19, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 1, 0, 0, 78, 117, 109, 86, 97, 108, 117, 101, 115, 80, 101, 114, 73, 110, 115, 116, 97, 110, 99, 101, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 48, 46, 49, 48, 48, 49, 49, 46, 48, 0, 171, 171, 171, 73, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 79, 83, 71, 78, 8, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 83, 72, 69, 88, 236, 2, 0, 0, 80, 0, 5, 0, 187, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 4, 70, 142, 32, 0, 0, 0, 0, 0, 1, 0, 0, 0, 156, 8, 0, 4, 0, 224, 17, 0, 0, 0, 0, 0, 68, 68, 0, 0, 104, 0, 0, 2, 4, 0, 0, 0, 155, 0, 0, 4, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 54, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 48, 0, 0, 1, 80, 0, 0, 8, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 10, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 26, 0, 16, 0, 0, 0, 0, 0, 54, 0, 0, 5, 34, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 48, 0, 0, 1, 80, 0, 0, 8, 66, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 26, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 3, 42, 0, 16, 0, 0, 0, 0, 0, 41, 0, 0, 7, 66, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 130, 0, 16, 0, 0, 0, 0, 0, 166, 10, 16, 0, 0, 0, 0, 0, 150, 227, 17, 0, 0, 0, 0, 0, 140, 0, 0, 11, 18, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 31, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 34, 0, 16, 0, 1, 0, 0, 0, 6, 0, 16, 0, 1, 0, 0, 0, 22, 238, 17, 0, 0, 0, 0, 0, 35, 0, 0, 10, 66, 0, 16, 0, 1, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 26, 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 41, 0, 0, 7, 130, 0, 16, 0, 1, 0, 0, 0, 42, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 130, 0, 16, 0, 1, 0, 0, 0, 246, 15, 16, 0, 1, 0, 0, 0, 150, 227, 17, 0, 0, 0, 0, 0, 140, 0, 0, 11, 66, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 31, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 42, 0, 16, 0, 1, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 163, 0, 0, 137, 66, 0, 0, 128, 3, 17, 17, 0, 66, 0, 16, 0, 1, 0, 0, 0, 166, 10, 16, 0, 1, 0, 0, 0, 150, 236, 17, 0, 0, 0, 0, 0, 132, 0, 0, 9, 18, 0, 16, 0, 2, 0, 0, 0, 18, 0, 16, 0, 3, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 58, 0, 16, 0, 1, 0, 0, 0, 30, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, 42, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 1, 0, 0, 0, 30, 0, 0, 7, 130, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 3, 0, 0, 0, 58, 0, 16, 0, 0, 0, 0, 0, 164, 0, 0, 7, 242, 224, 17, 0, 0, 0, 0, 0, 166, 10, 16, 0, 0, 0, 0, 0, 6, 0, 16, 0, 2, 0, 0, 0, 164, 0, 0, 7, 242, 224, 17, 0, 0, 0, 0, 0, 6, 0, 16, 0, 1, 0, 0, 0, 246, 15, 16, 0, 0, 0, 0, 0, 30, 0, 0, 7, 34, 0, 16, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 22, 0, 0, 1, 30, 0, 0, 7, 18, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 22, 0, 0, 1, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 27, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 }; ================================================ FILE: include/Query.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class ConditionalAutoTransition { public: ConditionalAutoTransition() : m_pCommandList(nullptr), m_pResource(nullptr) {} void Init(ID3D12GraphicsCommandList* pCommandList, ID3D12Resource* pResource, UINT Subresource, D3D12_RESOURCE_STATES Before, D3D12_RESOURCE_STATES After); ~ConditionalAutoTransition(); private: ID3D12GraphicsCommandList* m_pCommandList; ID3D12Resource* m_pResource; UINT m_Subresource; D3D12_RESOURCE_STATES m_Before; D3D12_RESOURCE_STATES m_After; }; class AutoTransition : public ConditionalAutoTransition { public: AutoTransition(ID3D12GraphicsCommandList* pCommandList, ID3D12Resource* pResource, UINT Subresource, D3D12_RESOURCE_STATES Before, D3D12_RESOURCE_STATES After) { Init(pCommandList, pResource, Subresource, Before, After); } }; //================================================================================================================================== // Async // Stores data responsible for remapping D3D11 async (e.g. queries) to underlying D3D12 async //================================================================================================================================== enum EQueryType { e_QUERY_EVENT = 0, e_QUERY_OCCLUSION, e_QUERY_TIMESTAMP, e_QUERY_TIMESTAMPDISJOINT, e_QUERY_PIPELINESTATS, e_QUERY_OCCLUSIONPREDICATE, e_QUERY_STREAMOUTPUTSTATS, e_QUERY_STREAMOVERFLOWPREDICATE, e_QUERY_STREAMOUTPUTSTATS_STREAM0, e_QUERY_STREAMOUTPUTSTATS_STREAM1, e_QUERY_STREAMOUTPUTSTATS_STREAM2, e_QUERY_STREAMOUTPUTSTATS_STREAM3, e_QUERY_STREAMOVERFLOWPREDICATE_STREAM0, e_QUERY_STREAMOVERFLOWPREDICATE_STREAM1, e_QUERY_STREAMOVERFLOWPREDICATE_STREAM2, e_QUERY_STREAMOVERFLOWPREDICATE_STREAM3, e_QUERY_VIDEO_DECODE_STATISTICS, e_COUNTER_GPU_IDLE = 0x1000, // Start of "counters" e_COUNTER_VERTEX_PROCESSING, e_COUNTER_GEOMETRY_PROCESSING, e_COUNTER_PIXEL_PROCESSING, e_COUNTER_OTHER_GPU_PROCESSING, e_COUNTER_HOST_ADAPTER_BANDWIDTH_UTILIZATION, e_COUNTER_LOCAL_VIDMEM_BANDWIDTH_UTILIZATION, e_COUNTER_VERTEX_THROUGHPUT_UTILIZATION, e_COUNTER_TRISETUP_THROUGHPUT_UTILIZATION, e_COUNTER_FILLRATE_THROUGHPUT_UTILIZATION, e_COUNTER_VERTEXSHADER_MEMORY_LIMITED, e_COUNTER_VERTEXSHADER_COMPUTATION_LIMITED, e_COUNTER_GEOMETRYSHADER_MEMORY_LIMITED, e_COUNTER_GEOMETRYSHADER_COMPUTATION_LIMITED, e_COUNTER_PIXELSHADER_MEMORY_LIMITED, e_COUNTER_PIXELSHADER_COMPUTATION_LIMITED, e_COUNTER_POST_TRANSFORM_CACHE_HIT_RATE, e_COUNTER_TEXTURE_CACHE_HIT_RATE, }; class Async : public DeviceChild { public: enum class AsyncState { Begun, Ended }; public: Async(ImmediateContext* pDevice, EQueryType Type, UINT CommandListTypeMask) noexcept; virtual ~Async() noexcept; virtual void Initialize() noexcept(false) = 0; virtual void Suspend() noexcept = 0; virtual void Resume() noexcept = 0; void Begin() noexcept; void End() noexcept; bool GetData(void* pData, UINT DataSize, bool DoNotFlush, bool AsyncGetData) noexcept; bool FlushAndPrep(bool DoNotFlush) noexcept; static bool RequiresBegin(EQueryType type) noexcept; bool RequiresBegin() const noexcept; protected: virtual void BeginInternal(bool restart) noexcept = 0; virtual void EndInternal() noexcept = 0; virtual void GetDataInternal(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize) noexcept = 0; public: LIST_ENTRY m_ActiveQueryListEntry; EQueryType m_Type; AsyncState m_CurrentState; UINT64 m_EndedCommandListID[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; UINT m_CommandListTypeMask; }; class Query : public Async { public: Query(ImmediateContext* pDevice, EQueryType Type, UINT CommandListTypeMask, UINT nInstances = c_DefaultInstancesPerQuery) noexcept : Async(pDevice, Type, CommandListTypeMask) , m_CurrentInstance(0) , m_InstancesPerQuery(nInstances) , m_Accumulate(Type != e_QUERY_VIDEO_DECODE_STATISTICS) { } virtual ~Query(); virtual void Initialize() noexcept(false); virtual void Suspend() noexcept; virtual void Resume() noexcept; void FillPredicationBuffer(); ID3D12Resource *GetPredicationBuffer() { return m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].get(); } void GetInstanceData(void* pData, UINT DataSize, UINT InstanceIndex) noexcept; UINT GetCurrentInstance() { return m_CurrentInstance; } protected: virtual void BeginInternal(bool restart) noexcept; virtual void EndInternal() noexcept; virtual void GetDataInternal(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize) noexcept; D3D12_QUERY_TYPE GetType12() const; D3D12_QUERY_HEAP_TYPE GetHeapType12() const; UINT GetNumSubQueries() const; UINT GetDataSize12() const; void AdvanceInstance(); UINT QueryIndex(UINT Instance, UINT SubQuery, UINT NumSubQueries); static const UINT c_DefaultInstancesPerQuery = 4; protected: unique_comptr m_spQueryHeap[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; D3D12ResourceSuballocation m_spResultBuffer[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; unique_comptr m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; UINT m_CurrentInstance; const bool m_Accumulate; const UINT m_InstancesPerQuery; }; class EventQuery : public Async { public: EventQuery(ImmediateContext* pDevice, UINT CommandListTypeMask) noexcept : Async(pDevice, e_QUERY_EVENT, CommandListTypeMask) { } virtual void Initialize() noexcept(false); virtual void Suspend() noexcept; virtual void Resume() noexcept; protected: virtual void BeginInternal(bool restart) noexcept; virtual void EndInternal() noexcept; virtual void GetDataInternal(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize) noexcept; }; struct QUERY_DATA_TIMESTAMP_DISJOINT { UINT64 Frequency; BOOL Disjoint; }; class TimestampDisjointQuery : public Async { public: TimestampDisjointQuery(ImmediateContext* pDevice, UINT CommandListTypeMask) noexcept : Async(pDevice, e_QUERY_TIMESTAMPDISJOINT, CommandListTypeMask) { } virtual void Initialize() noexcept(false); virtual void Suspend() noexcept; virtual void Resume() noexcept; protected: virtual void BeginInternal(bool restart) noexcept; virtual void EndInternal() noexcept; virtual void GetDataInternal(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize) noexcept; }; }; ================================================ FILE: include/Residency.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { // Used to contain waits that must be satisfied before a pinned ManagedObject can be unpinned. class PinWaits { public: class PinWait { public: PinWait(ID3D12Fence* pFence, UINT64 value) : m_spFence(pFence), m_value(value) { } PinWait(const PinWait& pinWait) = default; ~PinWait() = default; CComPtr m_spFence; UINT64 m_value; }; ~PinWaits() { ClearPinWaits(); } void ClearPinWaits() { m_pinWaits.clear(); } void AddPinWaits(UINT NumSync, _In_reads_(NumSync) UINT64* pSignalValues, _In_reads_(NumSync) ID3D12Fence** ppFences) { m_pinWaits.reserve(m_pinWaits.size() + NumSync); // throw( bad_alloc); for (UINT i(0); i < NumSync; ++i) { m_pinWaits.emplace_back(ppFences[i], pSignalValues[i]); } } bool CheckPinWaits() { m_pinWaits.erase(std::remove_if(m_pinWaits.begin(), m_pinWaits.end(), [](PinWait& pinWait) { return (pinWait.m_spFence->GetCompletedValue() >= pinWait.m_value); }), m_pinWaits.end()); return m_pinWaits.size() > 0; } private: std::vector m_pinWaits; }; // Used to track meta data for each object the app potentially wants // to make resident or evict. class ManagedObject { public: enum class RESIDENCY_STATUS { RESIDENT, EVICTED }; ManagedObject() = default; ~ManagedObject() { #if TRANSLATION_LAYER_DBG for (bool val : CommandListsUsedOn) { assert(!val); } #endif } void Initialize(ID3D12Pageable* pUnderlyingIn, UINT64 ObjectSize) { assert(pUnderlying == nullptr); pUnderlying = pUnderlyingIn; Size = ObjectSize; } inline bool IsInitialized() { return pUnderlying != nullptr; } bool IsPinned() { return PinCount > 0 || m_pinWaits.CheckPinWaits(); } void Pin() { ++PinCount; } void AddPinWaits(UINT NumSync, _In_reads_(NumSync) UINT64* pSignalValues, _In_reads_(NumSync) ID3D12Fence** ppFences) { return m_pinWaits.AddPinWaits(NumSync, pSignalValues, ppFences); } void UnPin() { assert(PinCount > 0); --PinCount; } // Wether the object is resident or not RESIDENCY_STATUS ResidencyStatus = RESIDENCY_STATUS::RESIDENT; // The underlying D3D Object being tracked ID3D12Pageable* pUnderlying = nullptr; // The size of the D3D Object in bytes UINT64 Size = 0; UINT64 LastUsedFenceValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; UINT64 LastUsedPeriodicTrimNotificationIndex = 0; UINT64 LastUsedTimestamp = 0; // This is used to track which open command lists this resource is currently used on. // + 1 for transient residency sets. bool CommandListsUsedOn[(UINT)COMMAND_LIST_TYPE::MAX_VALID + 1] = {}; // Linked list entry LIST_ENTRY ListEntry; // Pinning an object prevents eviction. Callers must seperately make resident as usual. UINT32 PinCount = 0; PinWaits m_pinWaits; }; // This represents a set of objects which are referenced by a command list i.e. every time a resource // is bound for rendering, clearing, copy etc. the set must be updated to ensure the it is resident // for execution. class ResidencySet { friend class ResidencyManager; public: static const UINT32 InvalidIndex = (UINT32)-1; ResidencySet() = default; ~ResidencySet() = default; // Returns true if the object was inserted, false otherwise inline bool Insert(ManagedObject* pObject) { assert(CommandListIndex != InvalidIndex); // If we haven't seen this object on this command list mark it if (pObject->CommandListsUsedOn[CommandListIndex] == false) { pObject->CommandListsUsedOn[CommandListIndex] = true; Set.push_back(pObject); return true; } else { return false; } } void Open(UINT commandListType) { assert(CommandListIndex == InvalidIndex); CommandListIndex = commandListType; Set.clear(); } void Close() { for (auto pObject : Set) { pObject->CommandListsUsedOn[CommandListIndex] = false; } CommandListIndex = InvalidIndex; } private: UINT32 CommandListIndex = InvalidIndex; std::vector Set; }; namespace Internal { struct Fence { HRESULT Initialize(ID3D12Device* pDevice) { HRESULT hr = pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&pFence)); assert(SUCCEEDED(hr)); return hr; } HRESULT GPUWait(ID3D12CommandQueue* pQueue, UINT CommandListIndex) { if (CommandListIndex < (UINT)COMMAND_LIST_TYPE::MAX_VALID) { // Don't call Wait again if we've already inserted a wait on this queue assert(LastWaitedValues[CommandListIndex] <= FenceValue); if (LastWaitedValues[CommandListIndex] == FenceValue) { return S_OK; } LastWaitedValues[CommandListIndex] = FenceValue; } HRESULT hr = pQueue->Wait(pFence, FenceValue); assert(SUCCEEDED(hr)); return hr; } inline void Increment() { FenceValue++; } CComPtr pFence; UINT64 FenceValue = 0; UINT64 LastWaitedValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; }; // A Least Recently Used Cache. Tracks all of the objects requested by the app so that objects // that aren't used freqently can get evicted to help the app stay under buget. class LRUCache { public: LRUCache() : NumResidentObjects(0), NumEvictedObjects(0), ResidentSize(0) { InitializeListHead(&ResidentObjectListHead); }; void Insert(ManagedObject* pObject) { if (pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT) { InsertHeadList(&ResidentObjectListHead, &pObject->ListEntry); NumResidentObjects++; ResidentSize += pObject->Size; } else { NumEvictedObjects++; } } void Remove(ManagedObject* pObject) { if (pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT) { RemoveEntryList(&pObject->ListEntry); NumResidentObjects--; ResidentSize -= pObject->Size; } else { NumEvictedObjects--; } } // When an object is used by the GPU we move it to the end of the list. // This way things closer to the head of the list are the objects which // are stale and better candidates for eviction void ObjectReferenced(ManagedObject* pObject) { assert(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT); RemoveEntryList(&pObject->ListEntry); InsertTailList(&ResidentObjectListHead, &pObject->ListEntry); } void MakeResident(ManagedObject* pObject) { assert(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::EVICTED); pObject->ResidencyStatus = ManagedObject::RESIDENCY_STATUS::RESIDENT; InsertTailList(&ResidentObjectListHead, &pObject->ListEntry); NumEvictedObjects--; NumResidentObjects++; ResidentSize += pObject->Size; } void Evict(ManagedObject* pObject) { assert(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT); assert(!pObject->IsPinned()); pObject->ResidencyStatus = ManagedObject::RESIDENCY_STATUS::EVICTED; RemoveEntryList(&pObject->ListEntry); NumResidentObjects--; ResidentSize -= pObject->Size; NumEvictedObjects++; } // Evict all of the resident objects used in sync points up to the specficied one (inclusive) void TrimToSyncPointInclusive(INT64 CurrentUsage, INT64 CurrentBudget, std::vector &EvictionList, UINT64 FenceValues[]); // Trim all objects which are older than the specified time void TrimAgedAllocations(UINT64 FenceValues[], std::vector &EvictionList, UINT64 CurrentTimeStamp, UINT64 MinDelta); // Trim all objects which haven't been used in the last periodic trim callback period void TrimUnusedAllocationsSinceLastNotificationPeriod(UINT64 CurrentPeriodicTrimNotificationIndex, UINT64 FenceValues[], std::vector& EvictionList, UINT64& BytesToEvict); ManagedObject* GetResidentListHead() { if (IsListEmpty(&ResidentObjectListHead)) { return nullptr; } return CONTAINING_RECORD(ResidentObjectListHead.Flink, ManagedObject, ListEntry); } LIST_ENTRY ResidentObjectListHead; UINT32 NumResidentObjects; UINT32 NumEvictedObjects; UINT64 ResidentSize; }; } class ResidencyManager { public: ResidencyManager(ImmediateContext& ImmCtx) : ImmCtx(ImmCtx) { } ~ResidencyManager(); // NOTE: DeviceNodeIndex is an index not a mask. The majority of D3D12 uses bit masks to identify a GPU node whereas DXGI uses 0 based indices. HRESULT Initialize(UINT DeviceNodeIndex, IDXCoreAdapter* ParentAdapterDXCore, IDXGIAdapter3* ParentAdapterDXGI); // Incremented each trim notification callback invocation // Start at 1 so that resources used in the first period are not immediately evicted const UINT64 PeriodicTrimNotificationIndexInitialValue = 1; UINT64 PeriodicTrimNotificationIndex = PeriodicTrimNotificationIndexInitialValue; // Cookie returned at trim notification callback registration. UINT32_MAX indicates no callback registered. const DWORD c_PeriodicTrimCallbackCookie_Unregistered = UINT32_MAX; DWORD PeriodicTrimCallbackCookie = c_PeriodicTrimCallbackCookie_Unregistered; static void APIENTRY PeriodicTrimNotificationCallback(const D3D12_TRIM_NOTIFICATION* pData); void BeginTrackingObject(ManagedObject* pObject) { std::lock_guard Lock(Mutex); if (pObject) { assert(pObject->pUnderlying != nullptr); LRU.Insert(pObject); } } void EndTrackingObject(ManagedObject* pObject) { std::lock_guard Lock(Mutex); LRU.Remove(pObject); } // One residency set per command-list HRESULT ExecuteCommandList(ID3D12CommandQueue* Queue, UINT CommandListIndex, ID3D12CommandList* CommandList, ResidencySet* pMasterSet) { return ExecuteMasterSet(Queue, CommandListIndex, pMasterSet, [Queue, CommandList]() { Queue->ExecuteCommandLists(1, &CommandList); }); } template HRESULT SubmitCommandQueueCommand(ID3D12CommandQueue* Queue, UINT CommandListIndex, ResidencySet* pMasterSet, TFunc&& func) { return ExecuteMasterSet(Queue, CommandListIndex, pMasterSet, func); } HRESULT PreExecuteCommandQueueCommand(ID3D12CommandQueue* Queue, UINT CommandListIndex, ResidencySet* pMasterSet) { return PrepareToExecuteMasterSet(Queue, CommandListIndex, pMasterSet); } private: HRESULT PrepareToExecuteMasterSet(ID3D12CommandQueue* Queue, UINT CommandListIndex, ResidencySet* pMasterSet) { // Evict or make resident all of the objects we identified above. HRESULT hr = ProcessPagingWork(CommandListIndex, pMasterSet); // If there are some things that need to be made resident we need to make sure that the GPU // doesn't execute until the async thread signals that the MakeResident call has returned. if (SUCCEEDED(hr)) { hr = AsyncThreadFence.GPUWait(Queue, CommandListIndex); } return hr; } template HRESULT ExecuteMasterSet(ID3D12CommandQueue* Queue, UINT CommandListIndex, ResidencySet* pMasterSet, TFunc&& func) { HRESULT hr = S_OK; hr = PrepareToExecuteMasterSet(Queue, CommandListIndex, pMasterSet); if (SUCCEEDED(hr)) { func(); } return hr; } HRESULT ProcessPagingWork(UINT CommandListIndex, ResidencySet *pMasterSet); void GetCurrentBudget(UINT64 Timestamp, DXCoreAdapterMemoryBudget* InfoOut); void WaitForSyncPoint(UINT64 FenceValues[]); // Generate a result between the minimum period and the maximum period based on the current // local memory pressure. I.e. when memory pressure is low, objects will persist longer before // being evicted. UINT64 GetCurrentEvictionGracePeriod(DXCoreAdapterMemoryBudget* LocalMemoryState) { // 1 == full pressure, 0 == no pressure double Pressure = (double(LocalMemoryState->currentUsage) / double(LocalMemoryState->budget)); Pressure = min(Pressure, 1.0); if (Pressure > cTrimPercentageMemoryUsageThreshold) { // Normalize the pressure for the range 0 to cTrimPercentageMemoryUsageThreshold Pressure = (Pressure - cTrimPercentageMemoryUsageThreshold) / (1.0 - cTrimPercentageMemoryUsageThreshold); // Linearly interpolate between the min period and the max period based on the pressure return UINT64((MaxEvictionGracePeriodTicks - MinEvictionGracePeriodTicks) * (1.0 - Pressure)) + MinEvictionGracePeriodTicks; } else { // Essentially don't trim at all return MAXUINT64; } } ImmediateContext& ImmCtx; Internal::Fence AsyncThreadFence; CComPtr Device; CComPtr Device15; // NOTE: This is an index not a mask. The majority of D3D12 uses bit masks to identify a GPU node whereas DXGI uses 0 based indices. UINT NodeIndex = 0; IDXCoreAdapter* AdapterDXCore = nullptr; IDXGIAdapter3* AdapterDXGI = nullptr; Internal::LRUCache LRU; std::recursive_mutex Mutex; static constexpr float cMinEvictionGracePeriod = 1.0f; UINT64 MinEvictionGracePeriodTicks; static constexpr float cMaxEvictionGracePeriod = 60.0f; UINT64 MaxEvictionGracePeriodTicks; // When the app is using more than this % of its budgeted local VidMem trimming will occur // (valid between 0.0 - 1.0) static constexpr float cTrimPercentageMemoryUsageThreshold = 0.7f; DXCoreAdapterMemoryBudget CachedBudget; static constexpr float cBudgetQueryPeriod = 1.0f; UINT64 BudgetQueryPeriodTicks; UINT64 LastBudgetTimestamp = 0; // Use a union so that we only need 1 allocation union ResidentScratchSpace { ManagedObject *pManagedObject; ID3D12Pageable *pUnderlying; }; std::vector MakeResidentList; std::vector EvictionList; }; }; ================================================ FILE: include/Resource.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { // The same as D3D11 enum RESOURCE_USAGE { RESOURCE_USAGE_DEFAULT = 0, RESOURCE_USAGE_IMMUTABLE = 1, RESOURCE_USAGE_DYNAMIC = 2, RESOURCE_USAGE_STAGING = 3, }; enum RESOURCE_CPU_ACCESS { RESOURCE_CPU_ACCESS_NONE = 0x00000L, RESOURCE_CPU_ACCESS_WRITE = 0x10000L, RESOURCE_CPU_ACCESS_READ = 0x20000L, }; DEFINE_ENUM_FLAG_OPERATORS( RESOURCE_CPU_ACCESS ); enum RESOURCE_BIND_FLAGS { RESOURCE_BIND_NONE = 0x0L, RESOURCE_BIND_VERTEX_BUFFER = 0x1L, RESOURCE_BIND_INDEX_BUFFER = 0x2L, RESOURCE_BIND_CONSTANT_BUFFER = 0x4L, RESOURCE_BIND_SHADER_RESOURCE = 0x8L, RESOURCE_BIND_STREAM_OUTPUT = 0x10L, RESOURCE_BIND_RENDER_TARGET = 0x20L, RESOURCE_BIND_DEPTH_STENCIL = 0x40L, RESOURCE_BIND_UNORDERED_ACCESS = 0x80L, RESOURCE_BIND_GPU_INPUT = 0x20fL, RESOURCE_BIND_GPU_OUTPUT = 0xf0L, RESOURCE_BIND_CAPTURE = 0x800L, RESOURCE_BIND_DECODER = 0x200L, RESOURCE_BIND_VIDEO_ENCODER = 0x400L, }; DEFINE_ENUM_FLAG_OPERATORS( RESOURCE_BIND_FLAGS ); enum MAP_TYPE { MAP_TYPE_READ = 1, MAP_TYPE_WRITE = 2, MAP_TYPE_READWRITE = 3, MAP_TYPE_WRITE_DISCARD = 4, MAP_TYPE_WRITE_NOOVERWRITE = 5, }; enum class DeferredDestructionType { Submission, Completion }; struct MappedSubresource { void * pData; UINT RowPitch; UINT DepthPitch; }; // The parameters of the resource as the app sees it, the translation layer // can alter the values under the covers hence the distinction. struct AppResourceDesc { AppResourceDesc() { memset(this, 0, sizeof(*this)); } AppResourceDesc(UINT SubresourcesPerPlane, UINT8 NonOpaquePlaneCount, UINT Subresources, UINT8 MipLevels, UINT16 ArraySize, UINT Depth, UINT Width, UINT Height, DXGI_FORMAT Format, UINT Samples, UINT Quality, RESOURCE_USAGE usage, RESOURCE_CPU_ACCESS cpuAcess, RESOURCE_BIND_FLAGS bindFlags, D3D12_RESOURCE_DIMENSION dimension) : m_SubresourcesPerPlane(SubresourcesPerPlane) ,m_NonOpaquePlaneCount(NonOpaquePlaneCount) ,m_Subresources(Subresources) ,m_MipLevels(MipLevels) ,m_ArraySize(ArraySize) ,m_Depth(Depth) ,m_Width(Width) ,m_Height(Height) ,m_Format(Format) ,m_Samples(Samples) ,m_Quality(Quality) ,m_usage(usage) ,m_cpuAcess(cpuAcess) ,m_bindFlags(bindFlags) ,m_resourceDimension(dimension) {} AppResourceDesc(const D3D12_RESOURCE_DESC &desc12, D3D12TranslationLayer::RESOURCE_USAGE Usage, DWORD Access, DWORD BindFlags); UINT SubresourcesPerPlane() const { return m_SubresourcesPerPlane; } UINT8 NonOpaquePlaneCount() const { return m_NonOpaquePlaneCount; } UINT Subresources() const { return m_Subresources; } UINT8 MipLevels() const { return m_MipLevels; } UINT16 ArraySize() const { return m_ArraySize; } UINT Depth() const { return m_Depth; } UINT Width() const { return m_Width; } UINT Height() const { return m_Height; } DXGI_FORMAT Format() const { return m_Format; } UINT Samples() const { return m_Samples; } UINT Quality() const { return m_Quality; } RESOURCE_CPU_ACCESS CPUAccessFlags() const { return m_cpuAcess; } RESOURCE_USAGE Usage() const { return m_usage; } RESOURCE_BIND_FLAGS BindFlags() const { return m_bindFlags; } D3D12_RESOURCE_DIMENSION ResourceDimension() const { return m_resourceDimension; } UINT m_SubresourcesPerPlane; UINT8 m_NonOpaquePlaneCount; UINT m_Subresources; UINT8 m_MipLevels; UINT16 m_ArraySize; UINT m_Depth; UINT m_Width; UINT m_Height; DXGI_FORMAT m_Format; UINT m_Samples; UINT m_Quality; RESOURCE_USAGE m_usage; RESOURCE_CPU_ACCESS m_cpuAcess; RESOURCE_BIND_FLAGS m_bindFlags; D3D12_RESOURCE_DIMENSION m_resourceDimension; }; enum class FormatEmulation { None = 0, YV12 }; //TODO: remove this once runtime dependecy has been removed struct ResourceCreationArgs { D3D12_RESOURCE_DIMENSION ResourceDimension12() const { return m_desc12.Dimension; } D3D12_TEXTURE_LAYOUT ApiTextureLayout12() const { return m_desc12.Layout; } D3D11_RESOURCE_DIMENSION ResourceDimension11() const { return static_cast(ResourceDimension12()); } UINT ArraySize() const { return (ResourceDimension12() == D3D12_RESOURCE_DIMENSION_TEXTURE3D) ? 1 : m_desc12.DepthOrArraySize; } bool IsShared() const { return (m_flags11.MiscFlags & (D3D11_RESOURCE_MISC_SHARED | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX)) != 0; } bool IsNTHandleShared() const { return (m_flags11.MiscFlags & D3D11_RESOURCE_MISC_SHARED_NTHANDLE) != 0; } bool IsGDIStyleHandleShared() const { assert(!(IsNTHandleShared() && !IsShared())); // Can't be NT handle shared and not regular shared return IsShared() && !IsNTHandleShared(); } D3D12_RESOURCE_DESC m_desc12; AppResourceDesc m_appDesc; D3D12_HEAP_DESC m_heapDesc; D3D11_RESOURCE_FLAGS m_flags11; bool m_isPlacedTexture; bool m_bBoundForStreamOut; bool m_bManageResidency; bool m_bTriggerDeferredWaits; bool m_bIsD3D9on12Resource; FormatEmulation m_FormatEmulation = FormatEmulation::None; // Setting this function overrides the normal creation method used by the translation layer. // It can be used for smuggling a resource through the create path or using alternate creation APIs. std::function m_PrivateCreateFn; //11on12 Only UINT m_OffsetToStreamOutputSuffix; AllocatorHeapType m_heapType = AllocatorHeapType::None; }; // Handles when to start and end tracking for D3DX12ResidencyManager::ManagedObject struct ResidencyManagedObjectWrapper { ResidencyManagedObjectWrapper(ResidencyManager &residencyManager) : m_residencyManager(residencyManager) {} // ManagedObject uses a type of linked lists that breaks when copying it around, so disable the copy operator ResidencyManagedObjectWrapper(const ResidencyManagedObjectWrapper&) = delete; void Initialize(ID3D12Pageable *pResource, UINT64 resourceSize, bool isResident = true) { m_residencyHandle.Initialize(pResource, resourceSize); if (!isResident) { m_residencyHandle.ResidencyStatus = ManagedObject::RESIDENCY_STATUS::EVICTED; } m_residencyManager.BeginTrackingObject(&m_residencyHandle); } ~ResidencyManagedObjectWrapper() { m_residencyManager.EndTrackingObject(&m_residencyHandle); } ManagedObject &GetManagedObject() { return m_residencyHandle; } private: ManagedObject m_residencyHandle; ResidencyManager &m_residencyManager; }; // Wraps the BufferSuballocation with functions that help automatically account for the // suballocated offset class D3D12ResourceSuballocation { public: D3D12ResourceSuballocation() { Reset(); } D3D12ResourceSuballocation(ID3D12Resource *pResource, const HeapSuballocationBlock &allocation) : m_pResource(pResource), m_bufferSubAllocation(allocation) {} bool IsInitialized() { return GetResource() != nullptr; } void Reset() { m_pResource = nullptr; } ID3D12Resource *GetResource() const { return m_pResource; } UINT64 GetOffset() const { UINT offset = 0; if (m_bufferSubAllocation.IsDirectAllocation()) { assert(m_bufferSubAllocation.GetOffset() == 0); } else { // The disjoint buddy allocator works as if all the resources were // one contiguous block of memory and the offsets reflect this. // Convert the offset to be local to the selected resource offset = m_bufferSubAllocation.GetOffset() % cBuddyAllocatorThreshold; } return offset; } HRESULT Map( UINT Subresource, _In_opt_ const D3D12_RANGE *pReadRange, _Outptr_opt_result_bytebuffer_(_Inexpressible_("Dependent on resource")) void **ppData) { D3D12_RANGE *pOffsetReadRange = nullptr; D3D12_RANGE offsetReadRange; if (pReadRange) { offsetReadRange = OffsetRange(*pReadRange); pOffsetReadRange = &offsetReadRange; } HRESULT hr = GetResource()->Map(Subresource, pOffsetReadRange, ppData); if (*ppData) { *ppData = (void*)((BYTE *)(*ppData) + GetOffset()); } return hr; } void Unmap( UINT Subresource, _In_opt_ const D3D12_RANGE *pWrittenRange) { D3D12_RANGE *pOffsetWrittenRange = nullptr; D3D12_RANGE offsetWrittenRange; if (pWrittenRange) { offsetWrittenRange = OffsetRange(*pWrittenRange); pOffsetWrittenRange = &offsetWrittenRange; } GetResource()->Unmap(Subresource, pOffsetWrittenRange); } D3D12_TEXTURE_COPY_LOCATION GetCopyLocation(const D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint) const { CD3DX12_TEXTURE_COPY_LOCATION copyLocation(GetResource(), footprint); copyLocation.PlacedFootprint.Offset += GetOffset(); return copyLocation; } HeapSuballocationBlock const& GetBufferSuballocation() const { return m_bufferSubAllocation; } HeapSuballocationBlock &GetBufferSuballocation() { return m_bufferSubAllocation; } inline D3D12_RANGE OffsetRange(const D3D12_RANGE &originalRange) const { // If the range is empty, just leave it as-is if (originalRange.Begin == 0 && originalRange.End == 0) { return originalRange; } D3D12_RANGE offsetRange; offsetRange.Begin = originalRange.Begin + (SIZE_T)GetOffset(); offsetRange.End = originalRange.End + (SIZE_T)GetOffset(); return offsetRange; } private: ID3D12Resource *m_pResource; HeapSuballocationBlock m_bufferSubAllocation; }; struct EncodedResourceSuballocation { private: UINT64 Offset; UINT64 Size; SIZE_T Ptr; static constexpr UINT c_DirectAllocationMask = 1u; static UINT GetDirectAllocationMask(HeapSuballocationBlock const& block) { return block.IsDirectAllocation() ? c_DirectAllocationMask : 0u; } public: EncodedResourceSuballocation() = default; EncodedResourceSuballocation(HeapSuballocationBlock const& block, ID3D12Resource* pPtr) : Offset(block.GetOffset()) , Size(block.GetSize()) , Ptr(reinterpret_cast(pPtr) | GetDirectAllocationMask(block)) { } EncodedResourceSuballocation(D3D12ResourceSuballocation const& suballoc) : EncodedResourceSuballocation(suballoc.GetBufferSuballocation(), suballoc.GetResource()) { } bool IsDirectAllocation() const { return (Ptr & c_DirectAllocationMask) != 0; } ID3D12Resource* GetResource() const { return reinterpret_cast(Ptr & ~SIZE_T(1)); } ID3D12Resource* GetDirectAllocation() const { return IsDirectAllocation() ? GetResource() : nullptr; } HeapSuballocationBlock DecodeSuballocation() const { return HeapSuballocationBlock(Offset, Size, GetDirectAllocation()); } D3D12ResourceSuballocation Decode() const { return D3D12ResourceSuballocation(GetResource(), DecodeSuballocation()); } }; struct OutstandingResourceUse { OutstandingResourceUse(COMMAND_LIST_TYPE type, UINT64 value) { commandListType = type; fenceValue = value; } COMMAND_LIST_TYPE commandListType; UINT64 fenceValue; bool operator==(OutstandingResourceUse const& rhs) const { return commandListType == rhs.commandListType && fenceValue == rhs.fenceValue; } }; //================================================================================================================================== // Resource // Stores data responsible for remapping D3D11 resources to underlying D3D12 resources and heaps //================================================================================================================================== class Resource : public DeviceChild, public TransitionableResourceBase { public: // Methods friend class ImmediateContext; friend class BatchedResource; private: Resource(ImmediateContext* pDevice, ResourceCreationArgs& createArgs, void*& pPreallocatedMemory) noexcept(false); ~Resource() noexcept; void Create(ResourceAllocationContext threadingContext) noexcept(false); static size_t CalcPreallocationSize(ResourceCreationArgs const& createArgs); unique_comptr static AllocateResource(ImmediateContext* pDevice, ResourceCreationArgs& createArgs) noexcept(false); volatile UINT m_RefCount = 0; public: static TRANSLATION_API unique_comptr CreateResource(ImmediateContext* pDevice, ResourceCreationArgs& createArgs, ResourceAllocationContext threadingContext) noexcept(false); static TRANSLATION_API unique_comptr OpenResource(ImmediateContext* pDevice, ResourceCreationArgs& createArgs, _In_ IUnknown *pResource, DeferredDestructionType deferredDestructionType, _In_ D3D12_RESOURCE_STATES currentState) noexcept(false); inline void AddRef() { InterlockedIncrement(&m_RefCount); } inline void Release() { if (InterlockedDecrement(&m_RefCount) == 0) { delete this; } } void UsedInCommandList(COMMAND_LIST_TYPE commandListType, UINT64 id); ResourceCreationArgs* Parent() { return &m_creationArgs; } AppResourceDesc* AppDesc() { return &m_creationArgs.m_appDesc; } ID3D12Resource* GetUnderlyingResource() noexcept { return m_Identity->GetResource(); } void UnderlyingResourceChanged() noexcept(false); void ZeroConstantBufferPadding() noexcept; UINT NumSubresources() noexcept { return AppDesc()->Subresources() * m_SubresourceMultiplier; } UINT8 SubresourceMultiplier() noexcept { return m_SubresourceMultiplier; } UINT GetExtendedSubresourceIndex(UINT Index, UINT Plane) noexcept { assert(AppDesc()->NonOpaquePlaneCount() == 1 || Plane == 0); return ConvertSubresourceIndexAddPlane(Index, AppDesc()->SubresourcesPerPlane(), Plane); } CSubresourceSubset GetFullSubresourceSubset() { return CSubresourceSubset(AppDesc()->MipLevels(), AppDesc()->ArraySize(), AppDesc()->NonOpaquePlaneCount() * m_SubresourceMultiplier); } void DecomposeSubresource(_In_ UINT Subresource, _Out_ UINT &mipSlice, _Out_ UINT &arraySlice, _Out_ UINT &planeSlice) { D3D12DecomposeSubresource(Subresource, Parent()->m_desc12.MipLevels, Parent()->ArraySize(), mipSlice, arraySlice, planeSlice); } UINT GetSubresourceIndex(UINT PlaneIndex, UINT MipLevel, UINT ArraySlice) { return D3D12CalcSubresource(MipLevel, ArraySlice, PlaneIndex, Parent()->m_desc12.MipLevels, Parent()->ArraySize()); } D3D12_PLACED_SUBRESOURCE_FOOTPRINT& GetSubresourcePlacement(UINT subresource) noexcept; D3D12_RANGE GetSubresourceRange(UINT subresource, _In_opt_ const D3D12_BOX *pSelectedBox = nullptr) noexcept; UINT64 GetResourceSize() noexcept; static D3D12_HEAP_TYPE GetD3D12HeapType(RESOURCE_USAGE usage, UINT cpuAccessFlags) noexcept; static void FillSubresourceDesc(ID3D12Device* pDevice, bool supportsUnrestrictedBufferTextureCopyPitch, DXGI_FORMAT, UINT Width, UINT Height, UINT Depth, _Out_ D3D12_PLACED_SUBRESOURCE_FOOTPRINT& Placement) noexcept; UINT DepthPitch(UINT Subresource) noexcept; template UINT GetUniqueness() const noexcept { return m_AllUniqueness; } template<> UINT GetUniqueness() const noexcept { return m_SRVUniqueness; } template void ViewBound(View* pView, EShaderStage stage, UINT slot) { m_currentBindings.ViewBound(pView, stage, slot); } template void ViewUnbound(View* pView, EShaderStage stage, UINT slot) { m_currentBindings.ViewUnbound(pView, stage, slot); } inline UINT GetOffsetToStreamOutputSuffix() { return m_OffsetToStreamOutputSuffix; } RESOURCE_USAGE GetEffectiveUsage() const { return m_effectiveUsage; } inline bool IsBloatedConstantBuffer() { return (AppDesc()->BindFlags() & RESOURCE_BIND_FLAGS::RESOURCE_BIND_CONSTANT_BUFFER) && AppDesc()->Width() % D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT != 0; } inline bool IsDefaultResourceBloated() { return m_OffsetToStreamOutputSuffix != 0 || IsBloatedConstantBuffer(); } inline bool TriggersDeferredWaits() const { return m_creationArgs.m_bTriggerDeferredWaits; } inline FormatEmulation GetFormatEmulation() const { return m_creationArgs.m_FormatEmulation; } inline bool IsInplaceFormatEmulation() const { return GetFormatEmulation() == FormatEmulation::YV12; } // Format emulation modifies the resource in place for map/unmap bool WaitForOutstandingResourcesIfNeeded(bool DoNotWait); void AddHeapToTilePool(unique_comptr spHeap) { auto HeapDesc = spHeap->GetDesc(); m_TilePool.m_Allocations.emplace_back(static_cast(HeapDesc.SizeInBytes), 0); // throw( bad_alloc ) auto& Allocation = m_TilePool.m_Allocations.front(); Allocation.m_spUnderlyingBufferHeap = std::move(spHeap); } inline void SetMinLOD(float MinLOD) { m_MinLOD = MinLOD; } inline float GetMinLOD() { return m_MinLOD; } inline void SetWaitForCompletionRequired(bool value) { m_bWaitForCompletionRequired = value; } void ClearInputBindings(); void ClearOutputBindings(); UINT GetCommandListTypeMaskFromUsed() { UINT typeMask = 0; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if (m_LastUsedCommandListID[i] != 0) { typeMask |= (1 << i); } } return typeMask; } UINT GetCommandListTypeMask() { UINT typeMask = m_Identity->m_currentState.GetCommandListTypeMask(); if (typeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { typeMask = GetCommandListTypeMaskFromUsed(); } return typeMask; } UINT GetCommandListTypeMask(const CViewSubresourceSubset &viewSubresources) { UINT typeMask = m_Identity->m_currentState.GetCommandListTypeMask(viewSubresources); if (typeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { typeMask = GetCommandListTypeMaskFromUsed(); } return typeMask; } UINT GetCommandListTypeMask(UINT Subresource) { UINT typeMask = m_Identity->m_currentState.GetCommandListTypeMask(Subresource); if (typeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { typeMask = GetCommandListTypeMaskFromUsed(); } return typeMask; } // Used for when we are reusing a generic buffer that's used as an intermediate copy resource. Because // we're constantly copying to/from different resources with different footprints, we need to make sure we // update the app desc so that copies will use the right footprint void UpdateAppDesc(const AppResourceDesc &AppDesc); void SwapIdentities(Resource& Other) { std::swap(m_Identity, Other.m_Identity); std::swap(m_SubresourcePlacement[0].Offset, Other.m_SubresourcePlacement[0].Offset); DeviceChild::SwapIdentities(Other); } void AddToResidencyManager(bool bIsResident); bool IsResident() { return !GetIdentity()->m_pResidencyHandle || GetIdentity()->m_pResidencyHandle->GetManagedObject().ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT; } static bool IsSuballocatedFromSameHeap(Resource *pResourceA, Resource *pResourceB) { return pResourceA && pResourceB && pResourceA->GetIdentity()->m_bOwnsUnderlyingResource == false && pResourceB->GetIdentity()->m_bOwnsUnderlyingResource == false && pResourceA->GetIdentity()->GetSuballocatedResource() == pResourceB->GetIdentity()->GetSuballocatedResource(); } static bool IsSameUnderlyingSubresource(Resource *pResourceA, UINT subresourceA, Resource *pResourceB, UINT subresourceB) { return pResourceA && pResourceB && ((pResourceA == pResourceB && subresourceA == subresourceB) || IsSuballocatedFromSameHeap(pResourceA, pResourceB)); } AllocatorHeapType GetAllocatorHeapType() { assert(AppDesc()->CPUAccessFlags() != 0); if (m_creationArgs.m_heapType == AllocatorHeapType::None) { if (IsDecoderCompressedBuffer()) { return AllocatorHeapType::Decoder; } else if (AppDesc()->CPUAccessFlags() & RESOURCE_CPU_ACCESS_READ) { return AllocatorHeapType::Readback; } return AllocatorHeapType::Upload; } return m_creationArgs.m_heapType; } bool UnderlyingResourceIsSuballocated() { return !m_Identity->m_bOwnsUnderlyingResource && m_spCurrentCpuHeaps.size() == 0; } bool IsLockableSharedBuffer() { return m_creationArgs.m_desc12.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && m_creationArgs.IsShared() && AppDesc()->CPUAccessFlags(); } bool IsDecoderCompressedBuffer() { return Parent()->ResourceDimension12() == D3D12_RESOURCE_DIMENSION_BUFFER && (AppDesc()->BindFlags() & RESOURCE_BIND_DECODER); } bool OwnsReadbackHeap() { // These are cases where we can't suballocate out of larger heaps because resource transitions can only be done on heap granularity // and these resources can be transitioned out of the default heap state (COPY_DEST). // // Note: We don't need to do this for dynamic write-only buffers because those buffers always stay in GENRIC_READ and only transition // at copies (and transition back to GENERIC read directly afterwards) if (IsDecoderCompressedBuffer()) { return false; } else { return AppDesc()->BindFlags() & RESOURCE_BIND_DECODER || m_creationArgs.m_heapDesc.Properties.CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE || (AppDesc()->Usage() == RESOURCE_USAGE_DYNAMIC && Parent()->ResourceDimension12() == D3D12_RESOURCE_DIMENSION_BUFFER && (AppDesc()->CPUAccessFlags() & RESOURCE_CPU_ACCESS_READ) != 0); } } private: void InitializeSubresourceDescs() noexcept(false); void InitializeTilingData() noexcept; void CreateUnderlying(ResourceAllocationContext threadingContext) noexcept(false); public: // Contains all information that can be rotatable for back buffers struct SResourceIdentity { SResourceIdentity(UINT NumSubresources, bool bSimultaneousAccess, void*& pPreallocatedMemory) noexcept(false) : m_currentState(NumSubresources, bSimultaneousAccess, pPreallocatedMemory) { } unique_comptr m_spUnderlyingResource; D3D12ResourceSuballocation m_suballocation; ID3D12Resource *GetOwnedResource() { assert(m_bOwnsUnderlyingResource); return m_spUnderlyingResource.get(); } ID3D12Resource *GetSuballocatedResource() { assert(!m_bOwnsUnderlyingResource); return m_suballocation.GetResource(); } UINT64 GetSuballocatedOffset() { assert(!m_bOwnsUnderlyingResource); return m_suballocation.GetOffset(); } ID3D12Resource *GetResource() { if (m_bOwnsUnderlyingResource) { return GetOwnedResource(); } else { return GetSuballocatedResource(); } } bool m_bOwnsUnderlyingResource = true; bool m_bSharedResource = false; bool m_bPlacedTexture = false; CCurrentResourceState m_currentState; std::unique_ptr m_pResidencyHandle; UINT64 m_LastUAVAccess = 0; bool HasRestrictedOutstandingResources() { return m_MaxOutstandingResources != 0xffffffff; } std::vector m_OutstandingResources; UINT m_MaxOutstandingResources = 0xffffffff; }; std::unique_ptr AllocateResourceIdentity(UINT NumSubresources, bool bSimultaneousAccess) { struct VoidDeleter { void operator()(void* p) { operator delete(p); } }; size_t ObjectSize = sizeof(SResourceIdentity) + CCurrentResourceState::CalcPreallocationSize(NumSubresources, bSimultaneousAccess); std::unique_ptr spMemory(operator new(ObjectSize)); void* pPreallocatedMemory = reinterpret_cast(spMemory.get()) + 1; std::unique_ptr spIdentity( new (spMemory.get()) SResourceIdentity(NumSubresources, bSimultaneousAccess, pPreallocatedMemory)); spMemory.release(); // This can fire if CalcPreallocationSize doesn't account for all preallocated arrays, or if the void*& decays to void* assert(reinterpret_cast(pPreallocatedMemory) == reinterpret_cast(spIdentity.get()) + ObjectSize); return std::move(spIdentity); } SResourceIdentity* GetIdentity() { return m_Identity.get(); } CResourceBindings& GetBindingState() { return m_currentBindings; } ManagedObject *GetResidencyHandle(); private: template void UnbindList(LIST_ENTRY list, UnbindFunction& unbindFunction) { for (LIST_ENTRY *pListEntry = list.Flink; pListEntry != &list;) { auto pViewBindings = CONTAINING_RECORD(pListEntry, CViewBindings, m_ViewBindingList); for (UINT stage = 0; stage < _countof(pViewBindings->m_BindPoints); ++stage) { auto& bindings = pViewBindings->m_BindPoints[stage]; for (UINT slot = 0; bindings.any(); ++slot) { if (bindings.test(slot)) { unbindFunction(stage, slot); } } } D3D12TranslationLayer::RemoveEntryList(&pViewBindings->m_ViewBindingList); D3D12TranslationLayer::InitializeListHead(&pViewBindings->m_ViewBindingList); if (D3D12TranslationLayer::IsListEmpty(pListEntry)) { break; } } } void UnBindAsRTV(); void UnBindAsDSV(); void UnBindAsSRV(); void UnBindAsCBV(); void UnBindAsVB(); void UnBindAsIB(); float m_MinLOD; ResourceCreationArgs m_creationArgs; struct STilePoolAllocation { // For tier 1, attributed heaps need to be used // For tier 2, only the buffer heap is used unique_comptr m_spUnderlyingBufferHeap; unique_comptr m_spUnderlyingTextureHeap; UINT m_Size; UINT m_TileOffset; STilePoolAllocation(UINT size, UINT offset) : m_Size(size), m_TileOffset(offset) { } STilePoolAllocation() : m_Size(0), m_TileOffset(0) { } STilePoolAllocation(STilePoolAllocation&& other) : m_spUnderlyingBufferHeap(std::move(other.m_spUnderlyingBufferHeap)) , m_spUnderlyingTextureHeap(std::move(other.m_spUnderlyingTextureHeap)) , m_Size(std::move(other.m_Size)) , m_TileOffset(std::move(other.m_TileOffset)) { } }; struct STilePoolData { std::vector m_Allocations; }; struct STiledResourceData { STiledResourceData(UINT NumSubresources, void*& pPreallocatedMemory) : m_SubresourceTiling(NumSubresources, pPreallocatedMemory) { } Resource* m_pTilePool = nullptr; PreallocatedArray m_SubresourceTiling; UINT m_NumStandardMips = 0; UINT m_NumTilesForResource = 0; UINT m_NumTilesForPackedMips = 0; }; enum class EmulatedFormatMapState { Write, ReadWrite, Read, None }; struct SEmulatedFormatSubresourceStagingAllocation { struct Deallocator { void operator()(void* pData) { AlignedHeapFree16(pData); } }; std::unique_ptr m_pInterleavedData; }; struct SEmulatedFormatSubresourceStagingData { EmulatedFormatMapState m_MapState = EmulatedFormatMapState::None; UINT m_MapRefCount = 0; }; // Note: Must be declared before all members which have arrays sized by subresource index // For texture formats with both depth and stencil (D24S8 and D32S8X24), // D3D11 treats the depth and stencil as a single interleaved subresource, // while D3D12 treats them as independent planes, and therefore separate subresources // This is used on both default and staging textures with these formats // to modify subresource indices used for copies, transitions, and layout tracking const UINT8 m_SubresourceMultiplier; // All resources std::unique_ptr m_Identity; CResourceBindings m_currentBindings; UINT m_SRVUniqueness; // MinLOD, renaming UINT m_AllUniqueness; // Rotate // Internally used for indexing into arrays of data for dynamic // textures. Because textures with non-opaque planes share // an upload/readback heap, all non-opaque planes of the same // mip+arrrayslice will have the same DynamicTextureIndex UINT GetDynamicTextureIndex(UINT Subresource) { UINT MipIndex, PlaneIndex, ArrayIndex; DecomposeSubresource(Subresource, MipIndex, ArrayIndex, PlaneIndex); // NonOpaquePlanes share the same upload heap PlaneIndex = 0; return GetSubresourceIndex(PlaneIndex, MipIndex, ArrayIndex); } Resource* GetCurrentCpuHeap(UINT Subresource); void SetCurrentCpuHeap(UINT subresource, Resource* UploadHeap); void SetLastCopyCommandListID(UINT subresource, UINT64 commandListID) { UINT DynamicTextureIndex = GetDynamicTextureIndex(subresource); m_LastCommandListID[DynamicTextureIndex] = commandListID; } UINT64 GetLastCopyCommandListID(UINT subresource) { UINT DynamicTextureIndex = GetDynamicTextureIndex(subresource); return m_LastCommandListID[DynamicTextureIndex]; } struct CpuHeapData { D3D12ResourceSuballocation m_subAllocation; UINT64 m_LastCopyCommandListID = 0; }; // Dynamic textures: PreallocatedArray< unique_comptr > m_spCurrentCpuHeaps; // Dynamic/staging textures: PreallocatedArray m_SubresourcePlacement; // Staging textures: PreallocatedArray m_FormatEmulationStagingAllocation; auto &GetFormatEmulationSubresourceStagingAllocation(UINT Subresource) { return m_FormatEmulationStagingAllocation[GetDynamicTextureIndex(Subresource)].m_pInterleavedData; } PreallocatedArray m_FormatEmulationStagingData; PreallocatedArray m_LastCommandListID; // For streamoutput buffers, 11on12 will add some bytes to the end // to hold a SStreamOutputSuffix // This contains the byte offset to that structure UINT m_OffsetToStreamOutputSuffix; // Tiled resources STilePoolData m_TilePool; STiledResourceData m_TiledResource; // The effective usage of the resource. Row-major default textures are // treated like staging textures, because D3D12 doesn't support row-major // except for cross-adapter. RESOURCE_USAGE m_effectiveUsage; // The following are used to track state of mapped dynamic textures. When // these textures are planar, each plane is mapped independently. However // the same upload buffer must be used so that they are adjacent in memory; // This is required when all planes are mapped by an application // using a single API call, even though the runtime splits it into three calls // to Map and Unmap. struct DynamicTexturePlaneData { UINT8 m_MappedPlaneRefCount[3] = {}; UINT8 m_DirtyPlaneMask = 0; bool AnyPlaneMapped() const { return (*reinterpret_cast(this) & 0xffffff) != 0; } }; PreallocatedArray m_DynamicTexturePlaneData; DynamicTexturePlaneData &GetDynamicTextureData(UINT subresource) { UINT DynamicTextureIndex = GetDynamicTextureIndex(subresource); return m_DynamicTexturePlaneData[DynamicTextureIndex]; } bool m_isValid = false; // Fence used to ensure residency operations queued as part of UnwrapUnderlyingResource // operations are completed if the caller returns a resource without scheduling any work. DeferredWait m_UnwrapUnderlyingResidencyDeferredWait; public: HRESULT AddFenceForUnwrapResidency(ID3D12CommandQueue* pQueue); }; }; ================================================ FILE: include/ResourceBinding.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Resource state tracking structures //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //---------------------------------------------------------------------------------------------------------------------------------- // Tracks state due to bindings at a subresource granularity using bind refs class CSubresourceBindings { public: typedef void (CSubresourceBindings::*BindFunc)(); CSubresourceBindings() { } bool IsBoundAsPixelShaderResource() const { return m_PSSRVBindRefs != 0; } bool IsBoundAsNonPixelShaderResource() const { return m_NonPSSRVBindRefs != 0; } bool IsBoundAsUnorderedAccess() const { return m_UAVBindRefs != 0; } bool IsBoundAsRenderTarget() const { return m_RTVBindRefs != 0; } bool IsBoundAsWritableDepth() const { return m_bIsDepthOrStencilBoundForWrite; } bool IsBoundAsReadOnlyDepth() const { return m_bIsDepthOrStencilBoundForReadOnly; } void PixelShaderResourceViewBound() { ++m_PSSRVBindRefs; } void NonPixelShaderResourceViewBound() { ++m_NonPSSRVBindRefs; } void UnorderedAccessViewBound() { ++m_UAVBindRefs; } void RenderTargetViewBound() { ++m_RTVBindRefs; } void ReadOnlyDepthStencilViewBound() { m_bIsDepthOrStencilBoundForReadOnly = true; } void WritableDepthStencilViewBound() { m_bIsDepthOrStencilBoundForWrite = true; } void PixelShaderResourceViewUnbound() { --m_PSSRVBindRefs; } void NonPixelShaderResourceViewUnbound() { --m_NonPSSRVBindRefs; } void UnorderedAccessViewUnbound() { --m_UAVBindRefs; } void RenderTargetViewUnbound() { --m_RTVBindRefs; } void DepthStencilViewUnbound() { m_bIsDepthOrStencilBoundForReadOnly = false; m_bIsDepthOrStencilBoundForWrite = false; } private: union { UINT BindRefsUint = 0; struct { UINT m_PSSRVBindRefs : 8; UINT m_NonPSSRVBindRefs : 8; UINT m_UAVBindRefs : 7; UINT m_RTVBindRefs : 4; UINT m_bIsDepthOrStencilBoundForReadOnly : 1; UINT m_bIsDepthOrStencilBoundForWrite : 1; }; }; }; //---------------------------------------------------------------------------------------------------------------------------------- // Tracks currently bound views, and buffer bind points template class View; class CResourceBindings { public: CResourceBindings(UINT SubresourceCount, UINT BindFlags, void*& pPreallocatedArray) noexcept; ~CResourceBindings(); bool IsBoundAsShaderResource() const { return !D3D12TranslationLayer::IsListEmpty(&m_ShaderResourceViewList); } bool IsBoundAsRenderTarget() const { return !D3D12TranslationLayer::IsListEmpty(&m_RenderTargetViewList); } bool IsBoundAsUnorderedAccess() const { return !D3D12TranslationLayer::IsListEmpty(&m_UnorderedAccessViewList); } bool IsBoundAsDepthStencil() const { return m_bIsDepthStencilViewBound; } bool IsBoundAsConstantBuffer() const { return m_ConstantBufferBindRefs > 0; } bool IsBoundAsIndexBuffer() const { return m_bIsIndexBufferBound; } bool IsBoundAsVertexBuffer() const { return m_VertexBufferBindings != 0; } bool IsBoundAsStreamOut() const { return m_StreamOutBindings != 0; } void ViewBoundCommon(CViewSubresourceSubset& viewSubresources, CSubresourceBindings::BindFunc pfnBound); void ViewUnboundCommon(CViewSubresourceSubset& viewSubresources, CSubresourceBindings::BindFunc pfnUnbound); template static CSubresourceBindings::BindFunc GetBindFunc(EShaderStage stage); template static CSubresourceBindings::BindFunc GetUnbindFunc(EShaderStage stage); template LIST_ENTRY& GetViewList(); template void ViewBound(View* pView, EShaderStage stage, UINT slot); template void ViewUnbound(View* pView, EShaderStage stage, UINT slot); void VertexBufferBound(UINT slot); void StreamOutputBufferBound(UINT slot); void ConstantBufferBound(EShaderStage stage, UINT slot); void IndexBufferBound(); void VertexBufferUnbound(UINT slot); void StreamOutputBufferUnbound(UINT slot); void ConstantBufferUnbound(EShaderStage stage, UINT slot); void IndexBufferUnbound(); bool AreAllSubresourcesTheSame() const { return m_NumViewsReferencingSubresources == 0; } D3D12_RESOURCE_STATES GetD3D12ResourceUsageFromBindings(UINT subresource) const; COMMAND_LIST_TYPE GetCommandListTypeFromBindings() const; LIST_ENTRY& GetRenderTargetList() { return m_RenderTargetViewList; } LIST_ENTRY& GetShaderResourceList() { return m_ShaderResourceViewList; } UINT& GetVertexBufferBindings() { return m_VertexBufferBindings; } static size_t CalcPreallocationSize(UINT SubresourceCount) { return sizeof(CSubresourceBindings) * SubresourceCount; } private: friend class ImmediateContext; friend class Resource; const UINT m_BindFlags; PreallocatedArray m_SubresourceBindings; UINT m_NumViewsReferencingSubresources; LIST_ENTRY m_ShaderResourceViewList; LIST_ENTRY m_RenderTargetViewList; LIST_ENTRY m_UnorderedAccessViewList; std::bitset m_ConstantBufferBindings[ShaderStageCount]; UINT m_ConstantBufferBindRefs = 0; UINT m_VertexBufferBindings; // bitfield UINT m_StreamOutBindings : 4; // bitfield bool m_bIsDepthStencilViewBound; bool m_bIsIndexBufferBound; }; struct VBBinder { static void Bound(Resource* pBuffer, UINT slot, EShaderStage stage); static void Unbound(Resource* pBuffer, UINT slot, EShaderStage stage); }; struct IBBinder { static void Bound(Resource* pBuffer, UINT slot, EShaderStage stage); static void Unbound(Resource* pBuffer, UINT slot, EShaderStage stage); }; struct SOBinder { static void Bound(Resource* pBuffer, UINT slot, EShaderStage stage); static void Unbound(Resource* pBuffer, UINT slot, EShaderStage stage); }; //---------------------------------------------------------------------------------------------------------------------------------- // Binding helpers // Tracks dirty bits, calls Bound/Unbound functions on binding changes, // and tracks binding data from shader decls to allow binding typed/additional NULLs //---------------------------------------------------------------------------------------------------------------------------------- // Base class template class CBoundState { public: static const UINT NumBindings = NumBindSlots; public: CBoundState() = default; bool DirtyBitsUpTo(_In_range_(0, NumBindings) UINT slot) const noexcept; void SetDirtyBit(_In_range_(0, NumBindings - 1) UINT slot) noexcept { m_DirtyBits.set(slot); } void SetDirtyBits(std::bitset const& bits) noexcept { m_DirtyBits |= bits; } TBindable* const* GetBound() const noexcept { return m_Bound; } void ResetDirty(UINT slot) noexcept { m_DirtyBits.set(slot, false); } _Ret_range_(0, NumBindings) UINT GetNumBound() const noexcept { return m_NumBound; } void ReassertResourceState() const noexcept; bool UpdateBinding(_In_range_(0, NumBindings - 1) UINT slot, _In_opt_ TBindable* pBindable) noexcept { auto& Current = m_Bound[slot]; if (pBindable) { m_NumBound = max(m_NumBound, slot + 1); } if (Current != pBindable) { Current = pBindable; if (!pBindable) { TrimNumBound(); } m_DirtyBits.set(slot); return true; } return false; } void Clear() { for (UINT i = 0; i < m_NumBound; ++i) { UpdateBinding(i, nullptr); } } protected: void TrimNumBound() { while (m_NumBound > 0 && !m_Bound[m_NumBound - 1]) { --m_NumBound; } } protected: TBindable* m_Bound[NumBindings] = {}; std::bitset m_DirtyBits; _Field_range_(0, NumBindings) UINT m_NumBound = 0; }; //---------------------------------------------------------------------------------------------------------------------------------- // Non-shader-visible (RTV, DSV, VB, IB, SO) template class CSimpleBoundState : public CBoundState { public: CSimpleBoundState() = default; bool UpdateBinding(_In_range_(0, NumBindings - 1) UINT slot, _In_opt_ TBindable* pBindable, EShaderStage stage) noexcept; bool IsDirty() const { return this->m_DirtyBits.any(); } void ResetDirty() { this->m_DirtyBits.reset(); } void Clear(EShaderStage shader) { for (UINT i = 0; i < this->m_NumBound; ++i) { UpdateBinding(i, nullptr, shader); } } }; //---------------------------------------------------------------------------------------------------------------------------------- // SRV, UAV template class CViewBoundState : public CBoundState { public: typedef TDeclVector::value_type NullType; typedef D3D12_CPU_DESCRIPTOR_HANDLE Descriptor; static const NullType c_AnyNull = RESOURCE_DIMENSION::UNKNOWN; public: CViewBoundState() noexcept(false) : CBoundState() { m_ShaderData.reserve(this->NumBindings); // throw( bad_alloc ) } bool UpdateBinding(_In_range_(0, NumBindings - 1) UINT slot, _In_opt_ TBindable* pBindable, EShaderStage stage) noexcept; bool IsDirty(TDeclVector const& New, UINT rootSignatureBucketSize, bool bKnownDirty) noexcept; NullType GetNullType(_In_range_(0, NumBindings - 1) UINT slot) const noexcept { if (slot >= m_ShaderData.size()) return c_AnyNull; return m_ShaderData[slot]; } void FillDescriptors(_Out_writes_(NumBindings) Descriptor* pDescriptors, _In_reads_(D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY) Descriptor* pNullDescriptors, _In_range_(0, NumBindings) UINT RootSignatureHWM) noexcept { for (UINT i = 0; i < RootSignatureHWM; ++i) { if (this->m_Bound[i]) { pDescriptors[i] = this->m_Bound[i]->GetRefreshedDescriptorHandle(); } else { pDescriptors[i] = pNullDescriptors[(UINT)GetNullType(i)]; } this->m_DirtyBits.set(i, false); } } void Clear(EShaderStage shader) { for (UINT i = 0; i < NumBindSlots; ++i) { UpdateBinding(i, nullptr, shader); } } protected: TDeclVector m_ShaderData; }; //---------------------------------------------------------------------------------------------------------------------------------- class CConstantBufferBoundState : public CBoundState { public: CConstantBufferBoundState() noexcept : CBoundState() { } bool UpdateBinding(_In_range_(0, NumBindings - 1) UINT slot, _In_opt_ Resource* pBindable, EShaderStage stage) noexcept; bool IsDirty(_In_range_(0, NumBindings) UINT rootSignatureBucketSize) noexcept { bool bDirty = rootSignatureBucketSize > m_ShaderData || DirtyBitsUpTo(rootSignatureBucketSize); m_ShaderData = rootSignatureBucketSize; return bDirty; } void Clear(EShaderStage shader) { for (UINT i = 0; i < NumBindings; ++i) { UpdateBinding(i, nullptr, shader); } } protected: _Field_range_(0, NumBindings) UINT m_ShaderData = 0; }; //---------------------------------------------------------------------------------------------------------------------------------- class CSamplerBoundState : public CBoundState { public: typedef D3D12_CPU_DESCRIPTOR_HANDLE Descriptor; public: CSamplerBoundState() noexcept : CBoundState() { } bool UpdateBinding(_In_range_(0, NumBindings - 1) UINT slot, _In_ Sampler* pBindable) noexcept; bool IsDirty(_In_range_(0, NumBindings) UINT rootSignatureBucketSize) noexcept { bool bDirty = rootSignatureBucketSize > m_ShaderData || DirtyBitsUpTo(rootSignatureBucketSize); m_ShaderData = rootSignatureBucketSize; return bDirty; } void FillDescriptors(_Out_writes_(NumBindings) Descriptor* pDescriptors, Descriptor* pNullDescriptor, _In_range_(0, NumBindings) UINT RootSignatureHWM) noexcept { for (UINT i = 0; i < RootSignatureHWM; ++i) { pDescriptors[i] = (m_Bound[i]) ? m_Bound[i]->m_Descriptor : *pNullDescriptor; m_DirtyBits.set(i, false); } } void Clear() { for (UINT i = 0; i < NumBindings; ++i) { UpdateBinding(i, nullptr); } } protected: _Field_range_(0, NumBindings) UINT m_ShaderData = 0; }; }; ================================================ FILE: include/ResourceCache.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { struct ResourceCacheEntry { unique_comptr m_Resource; std::unique_ptr m_RTV; std::unique_ptr m_SRV; }; class ResourceCache { public: ResourceCache(ImmediateContext &device); ResourceCacheEntry const& GetResource(DXGI_FORMAT format, UINT width, UINT height, DXGI_FORMAT viewFormat = DXGI_FORMAT_UNKNOWN); void TakeCacheEntryOwnership(DXGI_FORMAT format, ResourceCacheEntry& entryOut); typedef std::map DXGIMap; private: DXGIMap m_Cache; ImmediateContext &m_device; }; } ================================================ FILE: include/ResourceState.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class CommandListManager; // These are defined in the private d3d12 header #define UNKNOWN_RESOURCE_STATE (D3D12_RESOURCE_STATES)0x8000u #define RESOURCE_STATE_VALID_BITS 0x2f3fff #define RESOURCE_STATE_VALID_INTERNAL_BITS 0x2fffff constexpr D3D12_RESOURCE_STATES RESOURCE_STATE_ALL_WRITE_BITS = D3D12_RESOURCE_STATE_RENDER_TARGET | D3D12_RESOURCE_STATE_UNORDERED_ACCESS | D3D12_RESOURCE_STATE_DEPTH_WRITE | D3D12_RESOURCE_STATE_STREAM_OUT | D3D12_RESOURCE_STATE_COPY_DEST | D3D12_RESOURCE_STATE_RESOLVE_DEST | D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE | D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE; enum class SubresourceTransitionFlags { None = 0, TransitionPreDraw = 1, NoBindingTransitions = 2, StateMatchExact = 4, ForceExclusiveState = 8, NotUsedInCommandListIfNoStateChange = 0x10, }; DEFINE_ENUM_FLAG_OPERATORS(SubresourceTransitionFlags); inline bool IsD3D12WriteState(UINT State, SubresourceTransitionFlags Flags) { return (State & RESOURCE_STATE_ALL_WRITE_BITS) != 0 || (Flags & SubresourceTransitionFlags::ForceExclusiveState) != SubresourceTransitionFlags::None; } //================================================================================================================================== // CDesiredResourceState // Stores the current desired state of either an entire resource, or each subresource. //================================================================================================================================== class CDesiredResourceState { public: struct SubresourceInfo { D3D12_RESOURCE_STATES State = UNKNOWN_RESOURCE_STATE; COMMAND_LIST_TYPE CommandListType = COMMAND_LIST_TYPE::UNKNOWN; SubresourceTransitionFlags Flags = SubresourceTransitionFlags::None; }; private: bool m_bAllSubresourcesSame = true; PreallocatedInlineArray m_spSubresourceInfo; public: static size_t CalcPreallocationSize(UINT SubresourceCount) { return sizeof(SubresourceInfo) * (SubresourceCount - 1); } CDesiredResourceState(UINT SubresourceCount, void*& pPreallocatedMemory) noexcept : m_spSubresourceInfo(SubresourceCount, pPreallocatedMemory) // throw( bad_alloc ) { } bool AreAllSubresourcesSame() const noexcept { return m_bAllSubresourcesSame; } SubresourceInfo const& GetSubresourceInfo(UINT SubresourceIndex) const noexcept; void SetResourceState(SubresourceInfo const& Info) noexcept; void SetSubresourceState(UINT SubresourceIndex, SubresourceInfo const& Info) noexcept; void Reset() noexcept; }; //================================================================================================================================== // CCurrentResourceState // Stores the current state of either an entire resource, or each subresource. // Current state can either be shared read across multiple queues, or exclusive on a single queue. //================================================================================================================================== class CCurrentResourceState { public: static constexpr unsigned NumCommandListTypes = static_cast(COMMAND_LIST_TYPE::MAX_VALID); struct ExclusiveState { UINT64 FenceValue = 0; D3D12_RESOURCE_STATES State = D3D12_RESOURCE_STATE_COMMON; COMMAND_LIST_TYPE CommandListType = COMMAND_LIST_TYPE::UNKNOWN; // There are cases where we want to synchronize against last write, instead // of last access. Therefore the exclusive state of a (sub)resource is not // overwritten when transitioning to shared state, simply marked as stale. // So Map(READ) always synchronizes against the most recent exclusive state, // while Map(WRITE) always synchronizes against the most recent state, whether // it's exclusive or shared. bool IsMostRecentlyExclusiveState = true; }; struct SharedState { UINT64 FenceValues[NumCommandListTypes] = {}; D3D12_RESOURCE_STATES State[NumCommandListTypes] = {}; }; private: const bool m_bSimultaneousAccess; bool m_bAllSubresourcesSame = true; // Note: As a (minor) memory optimization, using a contiguous block of memory for exclusive + shared state. // The memory is owned by the exclusive state pointer. The shared state pointer is non-owning and possibly null. PreallocatedInlineArray m_spExclusiveState; PreallocatedInlineArray m_pSharedState; void ConvertToSubresourceTracking() noexcept; public: static size_t CalcPreallocationSize(UINT SubresourceCount, bool bSimultaneousAccess) { return (sizeof(ExclusiveState) + (bSimultaneousAccess ? sizeof(SharedState) : 0u)) * (SubresourceCount - 1); } CCurrentResourceState(UINT SubresourceCount, bool bSimultaneousAccess, void*& pPreallocatedMemory) noexcept; bool SupportsSimultaneousAccess() const noexcept { return m_bSimultaneousAccess; } bool AreAllSubresourcesSame() const noexcept { return m_bAllSubresourcesSame; } bool IsExclusiveState(UINT SubresourceIndex) const noexcept; void SetExclusiveResourceState(ExclusiveState const& State) noexcept; void SetSharedResourceState(COMMAND_LIST_TYPE Type, UINT64 FenceValue, D3D12_RESOURCE_STATES State) noexcept; void SetExclusiveSubresourceState(UINT SubresourceIndex, ExclusiveState const& State) noexcept; void SetSharedSubresourceState(UINT SubresourceIndex, COMMAND_LIST_TYPE Type, UINT64 FenceValue, D3D12_RESOURCE_STATES State) noexcept; ExclusiveState const& GetExclusiveSubresourceState(UINT SubresourceIndex) const noexcept; SharedState const& GetSharedSubresourceState(UINT SubresourceIndex) const noexcept; UINT GetCommandListTypeMask() const noexcept; UINT GetCommandListTypeMask(CViewSubresourceSubset const& Subresources) const noexcept; UINT GetCommandListTypeMask(UINT Subresource) const noexcept; void Reset() noexcept; }; //================================================================================================================================== // DeferredWait //================================================================================================================================== struct DeferredWait { std::shared_ptr fence; UINT64 value; }; //================================================================================================================================== // TransitionableResourceBase // A base class that transitionable resources should inherit from. //================================================================================================================================== struct TransitionableResourceBase { LIST_ENTRY m_TransitionListEntry; CDesiredResourceState m_DesiredState; const bool m_bTriggersSwapchainDeferredWaits; std::vector m_ResourceDeferredWaits; static size_t CalcPreallocationSize(UINT NumSubresources) { return CDesiredResourceState::CalcPreallocationSize(NumSubresources); } TransitionableResourceBase(UINT NumSubresources, bool bTriggersDeferredWaits, void*& pPreallocatedMemory) noexcept : m_DesiredState(NumSubresources, pPreallocatedMemory) , m_bTriggersSwapchainDeferredWaits(bTriggersDeferredWaits) { D3D12TranslationLayer::InitializeListHead(&m_TransitionListEntry); } ~TransitionableResourceBase() noexcept { if (IsTransitionPending()) { D3D12TranslationLayer::RemoveEntryList(&m_TransitionListEntry); } } bool IsTransitionPending() const noexcept { return !D3D12TranslationLayer::IsListEmpty(&m_TransitionListEntry); } void AddDeferredWaits(const std::vector& DeferredWaits) noexcept(false) { m_ResourceDeferredWaits.insert(m_ResourceDeferredWaits.end(), DeferredWaits.begin(), DeferredWaits.end()); // throw( bad_alloc ) } }; //================================================================================================================================== // ResourceStateManagerBase // The main business logic for handling resource transitions, including multi-queue sync and shared/exclusive state changes. // // Requesting a resource to transition simply updates destination state, and ensures it's in a list to be processed later. // // When processing ApplyAllResourceTransitions, we build up sets of vectors. // There's a source one for each command list type, and a single one for the dest because we are applying // the resource transitions for a single operation. // There's also a vector for "tentative" barriers, which are merged into the destination vector if // no flushing occurs as a result of submitting the final barrier operation. // 99% of the time, there will only be the source being populated, but sometimes there will be a destination as well. // If the source and dest of a transition require different types, we put a (source->COMMON) in the approriate source vector, // and a (COMMON->dest) in the destination vector. // // Once all resources are processed, we: // 1. Submit all source barriers, except ones belonging to the destination queue. // 2. Flush all source command lists, except ones belonging to the destination queue. // 3. Determine if the destination queue is going to be flushed. // If so: Submit source barriers on that command list first, then flush it. // If not: Accumulate source, dest, and tentative barriers so they can be sent to D3D12 in a single API call. // 4. Insert waits on the destination queue - deferred waits, and waits for work on other queues. // 5. Insert destination barriers. // // Only once all of this has been done do we update the "current" state of resources, // because this is the only way that we know whether or not the destination queue has been flushed, // and therefore, we can get the correct fence values to store in the subresources. //================================================================================================================================== class ResourceStateManagerBase { protected: LIST_ENTRY m_TransitionListHead; std::vector m_SwapchainDeferredWaits; std::vector m_ResourceDeferredWaits; // State that is reset during the preamble, accumulated during resource traversal, // and applied during the submission phase. enum class PostApplyExclusiveState { Exclusive, Shared, SharedIfFlushed }; struct PostApplyUpdate { TransitionableResourceBase& AffectedResource; CCurrentResourceState& CurrentState; UINT SubresourceIndex; D3D12_RESOURCE_STATES NewState; PostApplyExclusiveState ExclusiveState; bool WasTransitioningToDestinationType; }; std::vector m_vSrcResourceBarriers[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; std::vector m_vDstResourceBarriers; std::vector m_vTentativeResourceBarriers; std::vector m_vPostApplyUpdates; COMMAND_LIST_TYPE m_DestinationCommandListType; bool m_bApplySwapchainDeferredWaits; bool m_bFlushQueues[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; UINT64 m_QueueFenceValuesToWaitOn[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; UINT64 m_InsertedQueueSync[(UINT)COMMAND_LIST_TYPE::MAX_VALID][(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; ResourceStateManagerBase() noexcept(false); ~ResourceStateManagerBase() noexcept { // All resources should be gone by this point, and each resource ensures it is no longer in this list. assert(D3D12TranslationLayer::IsListEmpty(&m_TransitionListHead)); } // These methods set the destination state of the resource/subresources and ensure it's in the transition list. void TransitionResource(TransitionableResourceBase& Resource, CDesiredResourceState::SubresourceInfo const& State) noexcept; void TransitionSubresources(TransitionableResourceBase& Resource, CViewSubresourceSubset const& Subresources, CDesiredResourceState::SubresourceInfo const& State) noexcept; void TransitionSubresource(TransitionableResourceBase& Resource, UINT SubresourceIndex, CDesiredResourceState::SubresourceInfo const& State) noexcept; // Deferred waits are inserted when a transition is processing that puts applicable resources // into a write state. The command list is flushed, and these waits are inserted before the barriers. void AddDeferredWait(std::shared_ptr const& spFence, UINT64 Value) noexcept(false); // Clear out any state from previous iterations. void ApplyResourceTransitionsPreamble() noexcept; // What to do with the resource, in the context of the transition list, after processing it. enum class TransitionResult { // There are no more pending transitions that may be processed at a later time (i.e. draw time), // so remove it from the pending transition list. Remove, // There are more transitions to be done, so keep it in the list. Keep }; // For every entry in the transition list, call a routine. // This routine must return a TransitionResult which indicates what to do with the list. template void ForEachTransitioningResource(TFunc&& func) noexcept(noexcept(func(std::declval()))) { for (LIST_ENTRY *pListEntry = m_TransitionListHead.Flink; pListEntry != &m_TransitionListHead;) { TransitionableResourceBase* pResource = CONTAINING_RECORD(pListEntry, TransitionableResourceBase, m_TransitionListEntry); TransitionResult result = func(*pResource); auto pNextListEntry = pListEntry->Flink; if (result == TransitionResult::Remove) { D3D12TranslationLayer::RemoveEntryList(pListEntry); D3D12TranslationLayer::InitializeListHead(pListEntry); } pListEntry = pNextListEntry; } } // Updates vectors with the operations that should be applied to the requested resource. // May update the destination state of the resource. TransitionResult ProcessTransitioningResource(ID3D12Resource* pTransitioningResource, TransitionableResourceBase& TransitionableResource, CCurrentResourceState& CurrentState, CResourceBindings& BindingState, UINT NumTotalSubresources, _In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) const UINT64* CurrentFenceValues, bool bIsPreDraw) noexcept(false); // This method is templated so that it can have the same code for two implementations without copy/paste. // It is not intended to be completely exstensible. One implementation is leveraged by the translation layer // itself using lambdas that can be inlined. The other uses std::functions which cannot and is intended for tests. template < typename TSubmitBarriersImpl, typename TSubmitCmdListImpl, typename THasCommandsImpl, typename TGetCurrentFenceImpl, typename TInsertDeferredWaitsImpl, typename TInsertQueueWaitImpl > void SubmitResourceTransitionsImpl(TSubmitBarriersImpl&&, TSubmitCmdListImpl&&, THasCommandsImpl&&, TGetCurrentFenceImpl&&, TInsertDeferredWaitsImpl&&, TInsertQueueWaitImpl&&); // Call the D3D12 APIs to perform the resource barriers, command list submission, and command queue sync // that was determined by previous calls to ProcessTransitioningResource. void SubmitResourceTransitions(_In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) CommandListManager** ppManagers) noexcept(false); // Call the callbacks provided to allow tests to inspect what D3D12 APIs would've been called // as part of finalizing a set of barrier operations. void SimulateSubmitResourceTransitions(std::function&, COMMAND_LIST_TYPE)> SubmitBarriers, std::function SubmitCmdList, std::function HasCommands, std::function GetCurrentFenceImpl, std::function InsertDeferredWaits, std::function InsertQueueWait); // Update the current state of resources now that all barriers and sync have been done. // Callback provided to allow this information to be used by concrete implementation as well. template void PostSubmitUpdateState(TFunc&& callback, _In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) const UINT64* PreviousFenceValues, UINT64 NewFenceValue) { for (auto& update : m_vPostApplyUpdates) { COMMAND_LIST_TYPE UpdateCmdListType = m_DestinationCommandListType; UINT64 UpdateFenceValue = NewFenceValue; bool Flushed = m_DestinationCommandListType != COMMAND_LIST_TYPE::UNKNOWN && NewFenceValue != PreviousFenceValues[(UINT)m_DestinationCommandListType]; if (update.ExclusiveState == PostApplyExclusiveState::Exclusive || (update.ExclusiveState == PostApplyExclusiveState::SharedIfFlushed && !Flushed)) { CCurrentResourceState::ExclusiveState NewExclusiveState = {}; if (update.WasTransitioningToDestinationType) { NewExclusiveState.CommandListType = m_DestinationCommandListType; NewExclusiveState.FenceValue = NewFenceValue; } else { auto& OldExclusiveState = update.CurrentState.GetExclusiveSubresourceState(update.SubresourceIndex); UpdateCmdListType = NewExclusiveState.CommandListType = OldExclusiveState.CommandListType; UpdateFenceValue = NewExclusiveState.FenceValue = PreviousFenceValues[(UINT)OldExclusiveState.CommandListType]; } NewExclusiveState.State = update.NewState; if (update.SubresourceIndex == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) { update.CurrentState.SetExclusiveResourceState(NewExclusiveState); } else { update.CurrentState.SetExclusiveSubresourceState(update.SubresourceIndex, NewExclusiveState); } } else if (update.WasTransitioningToDestinationType) { if (update.SubresourceIndex == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) { update.CurrentState.SetSharedResourceState(m_DestinationCommandListType, NewFenceValue, update.NewState); } else { update.CurrentState.SetSharedSubresourceState(update.SubresourceIndex, m_DestinationCommandListType, NewFenceValue, update.NewState); } } else { continue; } callback(update, UpdateCmdListType, UpdateFenceValue); } } private: // Helpers static bool TransitionRequired(D3D12_RESOURCE_STATES CurrentState, D3D12_RESOURCE_STATES& DestinationState, SubresourceTransitionFlags Flags) noexcept; void AddCurrentStateUpdate(TransitionableResourceBase& Resource, CCurrentResourceState& CurrentState, UINT SubresourceIndex, D3D12_RESOURCE_STATES NewState, PostApplyExclusiveState ExclusiveState, bool IsGoingToDestinationType) noexcept(false); void ProcessTransitioningSubresourceExclusive(CCurrentResourceState& CurrentState, UINT i, COMMAND_LIST_TYPE curCmdListType, _In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) const UINT64* CurrentFenceValues, CDesiredResourceState::SubresourceInfo& SubresourceDestinationInfo, D3D12_RESOURCE_STATES after, TransitionableResourceBase& TransitionableResource, D3D12_RESOURCE_BARRIER& TransitionDesc, SubresourceTransitionFlags Flags) noexcept(false); void ProcessTransitioningSubresourceShared(CCurrentResourceState& CurrentState, UINT i, D3D12_RESOURCE_STATES after, SubresourceTransitionFlags Flags, _In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) const UINT64* CurrentFenceValues, COMMAND_LIST_TYPE curCmdListType, D3D12_RESOURCE_BARRIER& TransitionDesc, TransitionableResourceBase& TransitionableResource) noexcept(false); void SubmitResourceBarriers(_In_reads_(Count) D3D12_RESOURCE_BARRIER const* pBarriers, UINT Count, _In_ CommandListManager* pManager) noexcept; }; //================================================================================================================================== // ResourceStateManager // The implementation of state management tailored to the ImmediateContext and Resource classes. //================================================================================================================================== class ResourceStateManager : public ResourceStateManagerBase { private: ImmediateContext& m_ImmCtx; public: ResourceStateManager(ImmediateContext& ImmCtx) : m_ImmCtx(ImmCtx) { } // *** NOTE: DEFAULT DESTINATION IS GRAPHICS, NOT INFERRED FROM STATE BITS. *** // Transition the entire resource to a particular destination state on a particular command list. void TransitionResource(Resource* pResource, D3D12_RESOURCE_STATES State, COMMAND_LIST_TYPE DestinationCommandListType = COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags Flags = SubresourceTransitionFlags::None) noexcept; // Transition a set of subresources to a particular destination state. Fast-path provided when subset covers entire resource. void TransitionSubresources(Resource* pResource, CViewSubresourceSubset const& Subresources, D3D12_RESOURCE_STATES State, COMMAND_LIST_TYPE DestinationCommandListType = COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags Flags = SubresourceTransitionFlags::None) noexcept; // Transition a single subresource to a particular destination state. void TransitionSubresource(Resource* pResource, UINT SubresourceIndex, D3D12_RESOURCE_STATES State, COMMAND_LIST_TYPE DestinationCommandListType = COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags Flags = SubresourceTransitionFlags::None) noexcept; // Update destination state of a resource to correspond to the resource's bind points. void TransitionResourceForBindings(Resource* pResource) noexcept; // Update destination state of specified subresources to correspond to the resource's bind points. void TransitionSubresourcesForBindings(Resource* pResource, CViewSubresourceSubset const& Subresources) noexcept; // Submit all barriers and queue sync. void ApplyAllResourceTransitions(bool bIsPreDraw = false) noexcept(false); using ResourceStateManagerBase::AddDeferredWait; }; }; ================================================ FILE: include/RootSignature.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include namespace D3D12TranslationLayer { #define ROOT_SIGNATURE_FLAG_ALLOW_LOW_TIER_RESERVED_HW_CB_LIMIT ((D3D12_ROOT_SIGNATURE_FLAGS)0x80000000) struct VersionedRootSignatureDescWithStorage { static constexpr UINT c_NumParameters = 16; // (CBV, SRV, Sampler) * 5 shader stages + UAV CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC RootDesc{ 0, (D3D12_ROOT_PARAMETER1*)nullptr }; CD3DX12_ROOT_PARAMETER1 Parameter[c_NumParameters]; static constexpr UINT c_NumExtraRangesForInterfaces = 15; // (CBs in space 1, samplers in spaces 1 and 2) * 5 shader stages CD3DX12_DESCRIPTOR_RANGE1 DescriptorRanges[c_NumParameters + c_NumExtraRangesForInterfaces]; std::vector InterfacesSRVRanges; VersionedRootSignatureDescWithStorage() = default; // Noncopyable, non-movable due to internal pointers. VersionedRootSignatureDescWithStorage(VersionedRootSignatureDescWithStorage const&) = delete; VersionedRootSignatureDescWithStorage(VersionedRootSignatureDescWithStorage&&) = delete; VersionedRootSignatureDescWithStorage& operator=(VersionedRootSignatureDescWithStorage const&) = delete; VersionedRootSignatureDescWithStorage& operator=(VersionedRootSignatureDescWithStorage&&) = delete; }; struct RootSignatureDesc { static_assert(D3D12_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT == 14, "Validation constant."); static_assert(D3D12_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT == 15, "Validation constant."); static constexpr UINT8 c_CBBuckets = 4; // 4, 8, 14, 15 static_assert(D3D12_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT == 128, "Validating constant."); static constexpr UINT8 c_SRVBuckets = 6; // 4, 8, 16, 32, 64, 128 static_assert(D3D12_COMMONSHADER_SAMPLER_SLOT_COUNT == 16, "Validating constant."); static constexpr UINT8 c_SamplerBuckets = 3; // 4, 8, 16 static_assert(D3D12_UAV_SLOT_COUNT == 64, "Validating constant."); static constexpr UINT8 c_UAVBuckets = 5; // 4, 8, 16, 32, 64 static UINT8 NonCBBindingCountToBucket(UINT BindingCount) { if (BindingCount <= 4) { return 0; } DWORD Bucket = 0; BitScanReverse(&Bucket, BindingCount - 1); // Bucket guaranteed to be at least 2 due to <= 4 check above. return static_cast(Bucket - 1); } static UINT8 CBBindingCountToBucket(UINT BindingCount) { if (BindingCount == 15) return 3; else if (BindingCount > 8) return 2; else if (BindingCount > 4) return 1; return 0; } static constexpr UINT NonCBBucketToBindingCount(UINT8 Bucket) { return 4 << Bucket; } static UINT CBBucketToBindingCount(UINT8 Bucket) { static constexpr UINT BindingCounts[c_CBBuckets] = { 4, 8, 14, 15 }; return BindingCounts[Bucket]; } struct ShaderStage { UINT8 m_CBBucket : 2; UINT8 m_SamplerBucket : 2; UINT8 m_SRVBucket : 3; UINT8 m_UsesShaderInterfaces : 1; ShaderStage() { m_CBBucket = 2; m_SamplerBucket = c_SamplerBuckets - 1; m_SRVBucket = c_SRVBuckets - 1; m_UsesShaderInterfaces = 0; } ShaderStage(SShaderDecls const* pShader) : ShaderStage() { if (pShader) { m_CBBucket = CBBindingCountToBucket(pShader->m_NumCBs); m_SamplerBucket = NonCBBindingCountToBucket(pShader->m_NumSamplers); m_SRVBucket = NonCBBindingCountToBucket((UINT)pShader->m_ResourceDecls.size()); m_UsesShaderInterfaces = pShader->m_bUsesInterfaces; } } ShaderStage(ShaderStage const&) = default; ShaderStage& operator=(ShaderStage const&) = default; UINT GetCBBindingCount() const { return CBBucketToBindingCount(m_CBBucket); } UINT GetSamplerBindingCount() const { return NonCBBucketToBindingCount(m_SamplerBucket); } UINT GetSRVBindingCount() const { return NonCBBucketToBindingCount(m_SRVBucket); } bool IsCB14() const { return m_CBBucket == 3; } }; ShaderStage m_ShaderStages[5]; // Only graphics stages are needed UINT8 m_UAVBucket; enum Flags : UINT16 { Compute = 1, RequiresBufferOutOfBoundsHandling = 2, UsesShaderInterfaces = 4, }; const Flags m_Flags; UINT m_NumSRVSpacesUsed[5]; template static Flags ComputeFlags(bool bRequiresBufferOutOfBoundsHandling, std::array const& shaders) { UINT flags = ((N == 1) ? Compute : 0) | (bRequiresBufferOutOfBoundsHandling ? RequiresBufferOutOfBoundsHandling : 0); for (SShaderDecls const* pShader : shaders) { if (pShader && pShader->m_bUsesInterfaces) { flags |= UsesShaderInterfaces; break; } } return (Flags)flags; } RootSignatureDesc(SShaderDecls const* pVS, SShaderDecls const* pPS, SShaderDecls const* pGS, SShaderDecls const* pHS, SShaderDecls const* pDS, bool bRequiresBufferOutOfBoundsHandling) : m_ShaderStages{ { ShaderStage(pPS) }, { ShaderStage(pVS) }, { ShaderStage(pGS) }, { ShaderStage(pHS) }, { ShaderStage(pDS) } } , m_UAVBucket(NonCBBindingCountToBucket(NumUAVBindings(pVS, pPS, pGS, pHS, pDS))) , m_Flags(ComputeFlags<5>(bRequiresBufferOutOfBoundsHandling, std::array{ pPS, pVS, pGS, pHS, pDS })) , m_NumSRVSpacesUsed{ pPS ? pPS->m_NumSRVSpacesUsed : 1u, pVS ? pVS->m_NumSRVSpacesUsed : 1u, pGS ? pGS->m_NumSRVSpacesUsed : 1u, pHS ? pHS->m_NumSRVSpacesUsed : 1u, pDS ? pDS->m_NumSRVSpacesUsed : 1u } { } RootSignatureDesc(SShaderDecls const* pCS, bool bRequiresBufferOutOfBoundsHandling) : m_ShaderStages{ { ShaderStage(pCS) } } , m_UAVBucket(NonCBBindingCountToBucket(pCS ? (UINT)pCS->m_UAVDecls.size() : 0u)) , m_Flags(ComputeFlags<1>(bRequiresBufferOutOfBoundsHandling, std::array{ pCS })) , m_NumSRVSpacesUsed{ pCS ? pCS->m_NumSRVSpacesUsed : 1u } { } RootSignatureDesc(RootSignatureDesc const&) = default; RootSignatureDesc& operator=(RootSignatureDesc const&) = default; UINT GetUAVBindingCount() const { return NonCBBucketToBindingCount(m_UAVBucket); } UINT64 GetAsUINT64() const { return *reinterpret_cast(this); } void GetAsD3D12Desc(VersionedRootSignatureDescWithStorage& Storage, ImmediateContext* pParent) const; bool operator==(RootSignatureDesc const& o) const { if (GetAsUINT64() != o.GetAsUINT64()) // Check non-interface equality return false; if (!(m_Flags & UsesShaderInterfaces)) // If no interfaces, then equal return true; // Check additional interface-specific data return memcmp(m_NumSRVSpacesUsed, o.m_NumSRVSpacesUsed, sizeof(m_NumSRVSpacesUsed)) == 0; } template ShaderStage const& GetShaderStage() const { static_assert(s < ShaderStageCount); if constexpr (s == e_CS) { return m_ShaderStages[0]; } else { return m_ShaderStages[(UINT)s]; } } private: static UINT NumUAVBindings(SShaderDecls const* pVS, SShaderDecls const* pPS, SShaderDecls const* pGS, SShaderDecls const* pHS, SShaderDecls const* pDS) { UINT MaxUAVCount = 0; SShaderDecls const* arr[] = { pVS, pPS, pGS, pHS, pDS }; for (auto p : arr) { if (p && p->m_UAVDecls.size() > MaxUAVCount) { MaxUAVCount = (UINT)p->m_UAVDecls.size(); } } return MaxUAVCount; } }; static_assert(offsetof(RootSignatureDesc, m_NumSRVSpacesUsed) == sizeof(UINT64), "Using as key"); class RootSignatureBase : protected DeviceChildImpl { public: RootSignatureBase(ImmediateContext* pParent) : DeviceChildImpl(pParent) { } protected: void Create(D3D12_VERSIONED_ROOT_SIGNATURE_DESC const& rootDesc) noexcept(false); void Create(const void* pBlob, SIZE_T BlobSize) noexcept(false); }; class InternalRootSignature : public RootSignatureBase { public: InternalRootSignature(ImmediateContext* pParent) : RootSignatureBase(pParent) { } using RootSignatureBase::Create; using DeviceChildImpl::Created; ID3D12RootSignature* GetRootSignature() { return GetForUse(COMMAND_LIST_TYPE::GRAPHICS); } }; class RootSignature : public RootSignatureBase { public: RootSignature(ImmediateContext* pParent, RootSignatureDesc const& desc) : RootSignatureBase(pParent) , m_Desc(desc) { VersionedRootSignatureDescWithStorage Storage; m_Desc.GetAsD3D12Desc(Storage, pParent); Create(Storage.RootDesc); } using DeviceChildImpl::GetForImmediateUse; const RootSignatureDesc m_Desc; }; }; template<> class std::hash { public: size_t operator()(D3D12TranslationLayer::RootSignatureDesc const& d) const { size_t seed = std::hash()(d.GetAsUINT64()); if (d.m_Flags & D3D12TranslationLayer::RootSignatureDesc::UsesShaderInterfaces) { for (auto SRVSpaces : d.m_NumSRVSpacesUsed) { D3D12TranslationLayer::hash_combine(seed, SRVSpaces); } } return seed; } }; ================================================ FILE: include/Sampler.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { //================================================================================================================================== // Sampler // Stores data responsible for remapping D3D11 samplers to underlying D3D12 samplers //================================================================================================================================== class Sampler : public DeviceChild { public: Sampler(ImmediateContext* pDevice, D3D12_SAMPLER_DESC const& desc) noexcept(false); ~Sampler() noexcept; public: D3D12_CPU_DESCRIPTOR_HANDLE m_Descriptor; UINT m_DescriptorHeapIndex; }; }; ================================================ FILE: include/Sampler.inl ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- inline Sampler::Sampler(ImmediateContext* pDevice, D3D12_SAMPLER_DESC const& desc) noexcept(false) : DeviceChild(pDevice) { if (!pDevice->ComputeOnly()) { m_Descriptor = pDevice->m_SamplerAllocator.AllocateHeapSlot(&m_DescriptorHeapIndex); // throw( _com_error ) pDevice->m_pDevice12->CreateSampler(&desc, m_Descriptor); } } //---------------------------------------------------------------------------------------------------------------------------------- inline Sampler::~Sampler() noexcept { if (!m_pParent->ComputeOnly()) { m_pParent->m_SamplerAllocator.FreeHeapSlot(m_Descriptor, m_DescriptorHeapIndex); } } }; ================================================ FILE: include/Shader.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { enum class RESOURCE_DIMENSION { UNKNOWN = 0, BUFFER = 1, TEXTURE1D = 2, TEXTURE2D = 3, TEXTURE2DMS = 4, TEXTURE3D = 5, TEXTURECUBE = 6, TEXTURE1DARRAY = 7, TEXTURE2DARRAY = 8, TEXTURE2DMSARRAY = 9, TEXTURECUBEARRAY = 10, RAW_BUFFER = 11, STRUCTURED_BUFFER = 12 }; typedef std::vector TDeclVector; struct SShaderDecls { TDeclVector m_ResourceDecls; TDeclVector m_UAVDecls; UINT m_NumSamplers = 0; UINT m_NumCBs = 0; UINT m_OutputStreamMask = 0; bool m_bUsesInterfaces = false; UINT m_NumSRVSpacesUsed = 1; void Parse(UINT const* pDriverBytecode); }; class Shader : public DeviceChild, public SShaderDecls { public: #ifdef SUPPORTS_DXBC_PARSE // Construct with ownership of DXBC, and parse decls Shader(ImmediateContext* pParent, std::unique_ptr byteCode, SIZE_T bytecodeSize); // Construct without ownership of DXBC, and parse decls Shader(ImmediateContext* pParent, const void* byteCode, SIZE_T bytecodeSize); #endif // Construct with ownership of DXBC and DXIL, with pre-parsed decls Shader(ImmediateContext* pParent, std::unique_ptr DXBC, CComHeapPtr& DXIL, SIZE_T dxilSize, SShaderDecls PrecomputedDecls); // Construct without ownership, shader model does not matter, with pre-parsed decls Shader(ImmediateContext* pParent, const void* byteCode, SIZE_T bytecodeSize, SShaderDecls PrecomputedDecls); UINT OutputStreamMask() { return m_OutputStreamMask; } const D3D12_SHADER_BYTECODE& GetByteCode() const{ return m_Desc; } private: #ifdef SUPPORTS_DXBC_PARSE void Init(); #endif std::unique_ptr const m_ByteCode; CComHeapPtr const m_Dxil; D3D12_SHADER_BYTECODE const m_Desc; }; }; ================================================ FILE: include/Shader.inl ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { template struct SShaderTraits; // Causes intellisense errors #undef DOMAIN #define SHADER_TRAITS_TRANSLATION_LAYER( initial, name, nameLower ) \ template<> \ struct SShaderTraits \ { \ static const EDirtyBits c_ShaderResourcesDirty = e_##initial##SShaderResourcesDirty; \ static const EDirtyBits c_SamplersDirty = e_##initial##SSamplersDirty; \ static const EDirtyBits c_ConstantBuffersDirty = e_##initial##SConstantBuffersDirty; \ static ImmediateContext::SStageState& CurrentStageState(ImmediateContext::SState& CurrentState) { return CurrentState.m_##initial##S; } \ } SHADER_TRAITS_TRANSLATION_LAYER(V, VERTEX, Vertex); SHADER_TRAITS_TRANSLATION_LAYER(P, PIXEL, Pixel); SHADER_TRAITS_TRANSLATION_LAYER(G, GEOMETRY, Geometry); SHADER_TRAITS_TRANSLATION_LAYER(D, DOMAIN, Domain); SHADER_TRAITS_TRANSLATION_LAYER(H, HULL, Hull); SHADER_TRAITS_TRANSLATION_LAYER(C, COMPUTE, Compute); //---------------------------------------------------------------------------------------------------------------------------------- template void TRANSLATION_API ImmediateContext::SetShaderResources(UINT StartSlot, __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT) UINT NumSRVs, SRV* const* ppSRVs) { typedef SShaderTraits TShaderTraits; ImmediateContext::SStageState& CurrentStageState = TShaderTraits::CurrentStageState(m_CurrentState); for (UINT i = 0; i < NumSRVs; ++i) { UINT slot = i + StartSlot; auto pSRV = ppSRVs[i]; CurrentStageState.m_SRVs.UpdateBinding(slot, pSRV, eShader); } } //---------------------------------------------------------------------------------------------------------------------------------- template void TRANSLATION_API ImmediateContext::SetSamplers(UINT StartSlot, __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) UINT NumSamplers, Sampler* const* ppSamplers) { typedef SShaderTraits TShaderTraits; ImmediateContext::SStageState& CurrentStageState = TShaderTraits::CurrentStageState(m_CurrentState); for (UINT i = 0; i < NumSamplers; ++i) { UINT slot = i + StartSlot; auto pSampler = ppSamplers[i]; CurrentStageState.m_Samplers.UpdateBinding(slot, pSampler); } } //---------------------------------------------------------------------------------------------------------------------------------- template void TRANSLATION_API ImmediateContext::SetConstantBuffers(UINT StartSlot, __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT) UINT NumBuffers, Resource* const* ppCBs, __in_ecount_opt(NumBuffers) CONST UINT* pFirstConstant, __in_ecount_opt(NumBuffers) CONST UINT* pNumConstants) { typedef SShaderTraits TShaderTraits; ImmediateContext::SStageState& CurrentStageState = TShaderTraits::CurrentStageState(m_CurrentState); for (UINT i = 0; i < NumBuffers; ++i) { UINT slot = i + StartSlot; Resource* pCB = ppCBs[i]; CurrentStageState.m_CBs.UpdateBinding(slot, pCB, eShader); UINT prevFirstConstant = CurrentStageState.m_uConstantBufferOffsets[slot]; UINT prevNumConstants = CurrentStageState.m_uConstantBufferCounts[slot]; UINT newFirstConstant = pFirstConstant ? pFirstConstant[i] : 0; UINT newNumConstants = pNumConstants ? pNumConstants[i] : D3D10_REQ_CONSTANT_BUFFER_ELEMENT_COUNT; if (prevFirstConstant != newFirstConstant || prevNumConstants != newNumConstants) { CurrentStageState.m_CBs.SetDirtyBit(slot); } CurrentStageState.m_uConstantBufferOffsets[slot] = newFirstConstant; CurrentStageState.m_uConstantBufferCounts[slot] = newNumConstants; } } }; ================================================ FILE: include/ShaderBinary.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #ifndef _SHADERBINARY_H #define _SHADERBINARY_H #include typedef UINT CShaderToken; /*==========================================================================; * * D3D10ShaderBinary namespace * * File: ShaderBinary.h * Content: Vertex shader assembler support * ***************************************************************************/ namespace D3D10ShaderBinary { const UINT MAX_INSTRUCTION_LENGTH = 128; const UINT D3D10_SB_MAX_INSTRUCTION_OPERANDS = 8; const UINT D3D11_SB_MAX_CALL_OPERANDS = 0x10000; const UINT D3D11_SB_MAX_NUM_TYPES = 0x10000; typedef enum D3D10_SB_OPCODE_CLASS { D3D10_SB_FLOAT_OP, D3D10_SB_INT_OP, D3D10_SB_UINT_OP, D3D10_SB_BIT_OP, D3D10_SB_FLOW_OP, D3D10_SB_TEX_OP, D3D10_SB_DCL_OP, D3D11_SB_ATOMIC_OP, D3D11_SB_MEM_OP, D3D11_SB_DOUBLE_OP, D3D11_SB_FLOAT_TO_DOUBLE_OP, D3D11_SB_DOUBLE_TO_FLOAT_OP, D3D11_SB_DEBUG_OP, } D3D10_SB_OPCODE_CLASS; struct CInstructionInfo { void Set (BYTE NumOperands, LPCSTR Name, D3D10_SB_OPCODE_CLASS OpClass, BYTE InPrecisionFromOutMask) { m_NumOperands = NumOperands; m_InPrecisionFromOutMask = InPrecisionFromOutMask; #if !MODERN_FLAG StringCchCopyA(m_Name, sizeof(m_Name), Name); #endif m_OpClass = OpClass; } char m_Name[64]; BYTE m_NumOperands; BYTE m_InPrecisionFromOutMask; D3D10_SB_OPCODE_CLASS m_OpClass; }; extern CInstructionInfo g_InstructionInfo[D3D10_SB_NUM_OPCODES]; UINT GetNumInstructionOperands(D3D10_SB_OPCODE_TYPE OpCode); void InitInstructionInfo(); //***************************************************************************** // // class COperandIndex // // Represents a dimension index of an operand // //***************************************************************************** class COperandIndex { public: //COperandIndex() {} // Value for the immediate index type union { UINT m_RegIndex; UINT m_RegIndexA[2]; INT64 m_RegIndex64; }; // Data for the relative index type D3D10_SB_OPERAND_TYPE m_RelRegType; D3D10_SB_4_COMPONENT_NAME m_ComponentName; D3D10_SB_OPERAND_INDEX_DIMENSION m_IndexDimension; BOOL m_bExtendedOperand; D3D11_SB_OPERAND_MIN_PRECISION m_MinPrecision; D3D10_SB_EXTENDED_OPERAND_TYPE m_ExtendedOperandType; // First index of the relative register union { UINT m_RelIndex; UINT m_RelIndexA[2]; INT64 m_RelIndex64; }; // Second index of the relative register union { UINT m_RelIndex1; UINT m_RelIndexA1[2]; INT64 m_RelIndex641; }; }; enum MinPrecQuantizeFunctionIndex // Used by reference rasterizer (IHVs can ignore) { MinPrecFuncDefault = 0, MinPrecFunc2_8, MinPrecFunc16, MinPrecFuncUint16, MinPrecFuncInt16, }; //***************************************************************************** // // class COperandBase // // A base class for shader instruction operands // //***************************************************************************** class COperandBase { public: COperandBase() {Clear();} COperandBase(const COperandBase & Op) { memcpy(this, &Op, sizeof(*this)); } D3D10_SB_OPERAND_TYPE OperandType() const {return m_Type;} const COperandIndex* OperandIndex(UINT Index) const {return &m_Index[Index];} D3D10_SB_OPERAND_INDEX_REPRESENTATION OperandIndexType(UINT Index) const {return m_IndexType[Index];} D3D10_SB_OPERAND_INDEX_DIMENSION OperandIndexDimension() const {return m_IndexDimension;} D3D10_SB_OPERAND_NUM_COMPONENTS NumComponents() const {return m_NumComponents;} // Get the register index for a given dimension UINT RegIndex(UINT Dimension = 0) const {return m_Index[Dimension].m_RegIndex;} // Get the register index from the lowest dimension UINT RegIndexForMinorDimension() const { switch (m_IndexDimension) { default: case D3D10_SB_OPERAND_INDEX_1D: return RegIndex(0); case D3D10_SB_OPERAND_INDEX_2D: return RegIndex(1); case D3D10_SB_OPERAND_INDEX_3D: return RegIndex(2); } } // Get the write mask UINT WriteMask() const {return m_WriteMask;} // Get the swizzle UINT SwizzleComponent(UINT index) const {return m_Swizzle[index];} // Get immediate 32 bit value UINT Imm32() const {return m_Value[0];} void SetModifier(D3D10_SB_OPERAND_MODIFIER Modifier) { m_Modifier = Modifier; if (Modifier != D3D10_SB_OPERAND_MODIFIER_NONE) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; } } void SetMinPrecision(D3D11_SB_OPERAND_MIN_PRECISION MinPrec) { m_MinPrecision = MinPrec; if( m_MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // reusing same extended operand token as modifiers. } } D3D10_SB_OPERAND_MODIFIER Modifier() const {return m_Modifier;} public: //esp in the unions...it's just redundant to not directly access things void Clear() { memset(this, 0, sizeof(*this)); } MinPrecQuantizeFunctionIndex m_MinPrecQuantizeFunctionIndex; // used by ref for low precision (IHVs can ignore) D3D10_SB_OPERAND_TYPE m_Type; COperandIndex m_Index[3]; D3D10_SB_OPERAND_NUM_COMPONENTS m_NumComponents; D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE m_ComponentSelection; BOOL m_bExtendedOperand; D3D10_SB_OPERAND_MODIFIER m_Modifier; D3D11_SB_OPERAND_MIN_PRECISION m_MinPrecision; D3D10_SB_EXTENDED_OPERAND_TYPE m_ExtendedOperandType; union { UINT m_WriteMask; BYTE m_Swizzle[4]; }; D3D10_SB_4_COMPONENT_NAME m_ComponentName; union { UINT m_Value[4]; float m_Valuef[4]; INT64 m_Value64[2]; double m_Valued[2]; }; struct { D3D10_SB_OPERAND_INDEX_REPRESENTATION m_IndexType[3]; D3D10_SB_OPERAND_INDEX_DIMENSION m_IndexDimension; }; friend class CShaderAsm; friend class CShaderCodeParser; friend class CInstruction; friend class COperand; friend class COperandDst; }; //***************************************************************************** // // class COperand // // Encapsulates a source operand in shader instructions // //***************************************************************************** class COperand: public COperandBase { public: COperand(): COperandBase() {} COperand(UINT Imm32): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_WriteMask = 0; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; m_bExtendedOperand = FALSE; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Value[0] = Imm32; m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT; } COperand(int Imm32): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_WriteMask = 0; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; m_bExtendedOperand = FALSE; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Value[0] = Imm32; m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT; } COperand(float Imm32): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_WriteMask = 0; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; m_bExtendedOperand = FALSE; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Valuef[0] = Imm32; m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT; } COperand(INT64 Imm64): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_WriteMask = 0; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE64; m_bExtendedOperand = FALSE; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Value64[0] = Imm64; m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT; } COperand(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_Type = Type; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; } // Immediate constant COperand(float v1, float v2, float v3, float v4): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; m_bExtendedOperand = FALSE; m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Valuef[0] = v1; m_Valuef[1] = v2; m_Valuef[2] = v3; m_Valuef[3] = v4; } // Immediate constant COperand(double v1, double v2): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE64; m_bExtendedOperand = FALSE; m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Valued[0] = v1; m_Valued[1] = v2; } // Immediate constant COperand(float v1, float v2, float v3, float v4, BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = SwizzleX; m_Swizzle[1] = SwizzleY; m_Swizzle[2] = SwizzleZ; m_Swizzle[3] = SwizzleW; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; m_bExtendedOperand = FALSE; m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Valuef[0] = v1; m_Valuef[1] = v2; m_Valuef[2] = v3; m_Valuef[3] = v4; } // Immediate constant COperand(int v1, int v2, int v3, int v4): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; m_bExtendedOperand = FALSE; m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Value[0] = v1; m_Value[1] = v2; m_Value[2] = v3; m_Value[3] = v4; } // Immediate constant COperand(int v1, int v2, int v3, int v4, BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = SwizzleX; m_Swizzle[1] = SwizzleY; m_Swizzle[2] = SwizzleZ; m_Swizzle[3] = SwizzleW; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; m_bExtendedOperand = FALSE; m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Value[0] = v1; m_Value[1] = v2; m_Value[2] = v3; m_Value[3] = v4; } COperand(INT64 v1, INT64 v2): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = D3D10_SB_OPERAND_TYPE_IMMEDIATE64; m_bExtendedOperand = FALSE; m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Value64[0] = v1; m_Value64[1] = v2; } COperand(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = SwizzleX; m_Swizzle[1] = SwizzleY; m_Swizzle[2] = SwizzleZ; m_Swizzle[3] = SwizzleW; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; } // Used for operands without indices COperand(D3D10_SB_OPERAND_TYPE Type, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Type = Type; m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; if( (Type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID) || (Type == D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || (Type == D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK) || (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) || (Type == D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ) { m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT; } else if( (Type == D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT) || (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID) || (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID) || (Type == D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) || (Type == D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER) ) { m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; } else { m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT; } } // source operand with relative addressing COperand(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; if (RegIndex == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[0].m_RegIndex = RegIndex; m_Index[0].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[0].m_RelIndex = RelRegIndex; m_Index[0].m_RelIndex1 = 0xFFFFFFFF; m_Index[0].m_ComponentName = RelComponentName; m_Index[0].m_MinPrecision = RelRegMinPrecision; if( RelRegMinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } } friend class CShaderAsm; friend class CShaderCodeParser; friend class CInstruction; }; //***************************************************************************** // // class COperand4 // // Encapsulates a source operand with 4 components in shader instructions // //***************************************************************************** class COperand4: public COperandBase { public: COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; } COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D10_SB_4_COMPONENT_NAME Component, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE; m_ComponentName = Component; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; } // single component select on reg, 1D indexing on address COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D10_SB_4_COMPONENT_NAME Component, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE; m_ComponentName = Component; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; if (RegIndex == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[0].m_RegIndex = RegIndex; m_Index[0].m_RelRegType = RelRegType; m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_Index[0].m_RelIndex = RelRegIndex; m_Index[0].m_RelIndex1 = 0xFFFFFFFF; m_Index[0].m_ComponentName = RelComponentName; m_Index[0].m_MinPrecision = RelRegMinPrecision; if( RelRegMinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } } // 4-component source operand with relative addressing COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; if (RegIndex == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[0].m_RegIndex = RegIndex; m_Index[0].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[0].m_RelIndex = RelRegIndex; m_Index[0].m_RelIndex1 = 0xFFFFFFFF; m_Index[0].m_ComponentName = RelComponentName; m_Index[0].m_ComponentName = RelComponentName; m_Index[0].m_MinPrecision = RelRegMinPrecision; if( RelRegMinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } } // 4-component source operand with relative addressing COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; if (RegIndex == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[0].m_RegIndex = RegIndex; m_Index[0].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[0].m_RelIndex = RelRegIndex; m_Index[0].m_RelIndex1 = RelRegIndex1; m_Index[0].m_ComponentName = RelComponentName; m_Index[0].m_MinPrecision = RelRegMinPrecision; if( RelRegMinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } } COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = SwizzleX; m_Swizzle[1] = SwizzleY; m_Swizzle[2] = SwizzleZ; m_Swizzle[3] = SwizzleW; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; } // 4-component source operand with relative addressing COperand4(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = SwizzleX; m_Swizzle[1] = SwizzleY; m_Swizzle[2] = SwizzleZ; m_Swizzle[3] = SwizzleW; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; if (RegIndex == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[0].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[0].m_RegIndex = RegIndex; m_Index[0].m_RelIndex = RelRegIndex; m_Index[0].m_RelIndex1 = RelRegIndex1; m_Index[0].m_ComponentName = RelComponentName; m_Index[0].m_MinPrecision = RelRegMinPrecision; if( RelRegMinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } } friend class CShaderAsm; friend class CShaderCodeParser; friend class CInstruction; }; //***************************************************************************** // // class COperandDst // // Encapsulates a destination operand in shader instructions // //***************************************************************************** class COperandDst: public COperandBase { public: COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE; m_WriteMask = D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL; m_Type = Type; m_MinPrecision = MinPrecision; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; } COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE; m_WriteMask = WriteMask; m_Type = Type; m_MinPrecision = MinPrecision; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; } COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, UINT WriteMask, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelRegMinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) :COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE; m_WriteMask = WriteMask; m_Type = Type; m_MinPrecision = MinPrecision; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; if (RegIndex == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[0].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[0].m_RegIndex = RegIndex; m_Index[0].m_RelIndex = RelRegIndex; m_Index[0].m_RelIndex1 = RelRegIndex1; m_Index[0].m_ComponentName = RelComponentName; m_Index[0].m_MinPrecision = RelRegMinPrecision; if( RelRegMinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } } COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex, UINT WriteMask, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1, D3D10_SB_4_COMPONENT_NAME RelComponentName, UINT, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE; m_WriteMask = WriteMask; m_Type = Type; m_MinPrecision = MinPrecision; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex; if (RelRegIndex == 0) m_IndexType[1] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[1].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[1].m_RegIndex = RelRegIndex; m_Index[1].m_RelIndex = RelRegIndex1; m_Index[1].m_RelIndex1 = 0; m_Index[1].m_ComponentName = RelComponentName; m_Index[1].m_MinPrecision = RelReg1MinPrecision; if( RelReg1MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[1].m_bExtendedOperand = true; m_Index[1].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[1].m_bExtendedOperand = false; } } // 2D dst (e.g. for GS input decl) COperandDst(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1,UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE; m_WriteMask = WriteMask; m_Type = Type; m_MinPrecision = MinPrecision; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[1].m_RegIndex = RegIndex1; } // Used for operands without indices COperandDst(D3D10_SB_OPERAND_TYPE Type, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { switch( Type ) { case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH: case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: case D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: m_NumComponents = D3D10_SB_OPERAND_1_COMPONENT; break; default: m_NumComponents = D3D10_SB_OPERAND_0_COMPONENT; break; } m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; m_Type = Type; m_MinPrecision = MinPrecision; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } } COperandDst(UINT WriteMask, D3D10_SB_OPERAND_TYPE Type, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) : COperandBase() // param order disambiguates from another constructor. { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE; m_WriteMask = WriteMask; m_Type = Type; m_MinPrecision = MinPrecision; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_0D; } friend class CShaderAsm; friend class CShaderCodeParser; friend class CInstruction; }; //***************************************************************************** // // class COperand2D // // Encapsulates 2 dimensional source operand with 4 components in shader instructions // //***************************************************************************** class COperand2D: public COperandBase { public: COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[1].m_RegIndex = RegIndex1; } COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1, D3D10_SB_4_COMPONENT_NAME Component, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE; m_ComponentName = Component; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[1].m_RegIndex = RegIndex1; } // 2-dimensional 4-component operand with relative addressing the second index // For example: // c2[x12[3].w + 7] // Type = c // RelRegType = x // RegIndex0 = 2 // RegIndex1 = 7 // RelRegIndex = 12 // RelRegIndex1 = 3 // RelComponentName = w // COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, UINT RelRegIndex1, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[1].m_RegIndex = RegIndex1; m_Index[1].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[1].m_RelIndex = RelRegIndex; m_Index[1].m_RelIndex1 = RelRegIndex1; m_Index[1].m_ComponentName = RelComponentName; m_Index[1].m_MinPrecision = RelReg1MinPrecision; if( RelReg1MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[1].m_bExtendedOperand = true; m_Index[1].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[1].m_bExtendedOperand = false; } } // 2-dimensional 4-component operand with relative addressing a second index // For example: // c2[r12.y + 7] // Type = c // RelRegType = r // RegIndex0 = 2 // RegIndex1 = 7 // RelRegIndex = 12 // RelRegIndex1 = 3 // RelComponentName = y // COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1, D3D10_SB_OPERAND_TYPE RelRegType, UINT RelRegIndex, D3D10_SB_4_COMPONENT_NAME RelComponentName, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; m_Index[1].m_RegIndex = RegIndex1; m_Index[1].m_RelRegType = RelRegType; if( RelRegType == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[1].m_RelIndex = RelRegIndex; m_Index[1].m_ComponentName = RelComponentName; m_Index[1].m_MinPrecision = RelReg1MinPrecision; if( RelReg1MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[1].m_bExtendedOperand = true; m_Index[1].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[1].m_bExtendedOperand = false; } } // 2-dimensional 4-component operand with relative addressing both operands COperand2D(D3D10_SB_OPERAND_TYPE Type, BOOL bIndexRelative0, BOOL bIndexRelative1, UINT RegIndex0, UINT RegIndex1, D3D10_SB_OPERAND_TYPE RelRegType0, UINT RelRegIndex0, UINT RelRegIndex10, D3D10_SB_4_COMPONENT_NAME RelComponentName0, D3D10_SB_OPERAND_TYPE RelRegType1, UINT RelRegIndex1, UINT RelRegIndex11, D3D10_SB_4_COMPONENT_NAME RelComponentName1, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelReg0MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = D3D10_SB_4_COMPONENT_X; m_Swizzle[1] = D3D10_SB_4_COMPONENT_Y; m_Swizzle[2] = D3D10_SB_4_COMPONENT_Z; m_Swizzle[3] = D3D10_SB_4_COMPONENT_W; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; if (bIndexRelative0) if (RegIndex0 == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_Index[0].m_RelRegType = RelRegType0; if( RelRegType0 == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[0].m_RelIndex = RelRegIndex0; m_Index[0].m_RelIndex1 = RelRegIndex10; m_Index[0].m_ComponentName = RelComponentName0; m_Index[0].m_MinPrecision = RelReg0MinPrecision; if( RelReg0MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } if (bIndexRelative1) if (RegIndex1 == 0) m_IndexType[1] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; else m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[1].m_RegIndex = RegIndex1; m_Index[1].m_RelRegType = RelRegType1; if( RelRegType1 == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[1].m_RelIndex = RelRegIndex1; m_Index[1].m_RelIndex1 = RelRegIndex11; m_Index[1].m_ComponentName = RelComponentName1; m_Index[1].m_MinPrecision = RelReg1MinPrecision; if( RelReg1MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[1].m_bExtendedOperand = true; m_Index[1].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[1].m_bExtendedOperand = false; } } COperand2D(D3D10_SB_OPERAND_TYPE Type, UINT RegIndex0, UINT RegIndex1, BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT): COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = SwizzleX; m_Swizzle[1] = SwizzleY; m_Swizzle[2] = SwizzleZ; m_Swizzle[3] = SwizzleW; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[1].m_RegIndex = RegIndex1; } // 2-dimensional 4-component operand with relative addressing and swizzle COperand2D(D3D10_SB_OPERAND_TYPE Type, BYTE SwizzleX, BYTE SwizzleY, BYTE SwizzleZ, BYTE SwizzleW, BOOL bIndexRelative0, BOOL bIndexRelative1, UINT RegIndex0, D3D10_SB_OPERAND_TYPE RelRegType0, UINT RelRegIndex0, UINT RelRegIndex10, D3D10_SB_4_COMPONENT_NAME RelComponentName0, UINT RegIndex1, D3D10_SB_OPERAND_TYPE RelRegType1, UINT RelRegIndex1, UINT RelRegIndex11, D3D10_SB_4_COMPONENT_NAME RelComponentName1, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelReg0MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT, D3D11_SB_OPERAND_MIN_PRECISION RelReg1MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) : COperandBase() { m_Modifier = D3D10_SB_OPERAND_MODIFIER_NONE; m_ComponentSelection = D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE; m_Swizzle[0] = SwizzleX; m_Swizzle[1] = SwizzleY; m_Swizzle[2] = SwizzleZ; m_Swizzle[3] = SwizzleW; m_Type = Type; m_bExtendedOperand = FALSE; m_MinPrecision = MinPrecision; if( MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_bExtendedOperand = true; m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } m_NumComponents = D3D10_SB_OPERAND_4_COMPONENT; m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; if (bIndexRelative0) if (RegIndex0 == 0) m_IndexType[0] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; else m_IndexType[0] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[0].m_RegIndex = RegIndex0; m_Index[0].m_RelRegType = RelRegType0; if( RelRegType0 == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[0].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[0].m_RelIndex = RelRegIndex0; m_Index[0].m_RelIndex1 = RelRegIndex10; m_Index[0].m_ComponentName = RelComponentName0; m_Index[0].m_MinPrecision = RelReg0MinPrecision; if( RelReg0MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[0].m_bExtendedOperand = true; m_Index[0].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[0].m_bExtendedOperand = false; } if (bIndexRelative1) if (RegIndex1 == 0) m_IndexType[1] = D3D10_SB_OPERAND_INDEX_RELATIVE; else m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; else m_IndexType[1] = D3D10_SB_OPERAND_INDEX_IMMEDIATE32; m_Index[1].m_RegIndex = RegIndex1; m_Index[1].m_RelRegType = RelRegType1; if( RelRegType1 == D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP ) { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_2D; } else { m_Index[1].m_IndexDimension = D3D10_SB_OPERAND_INDEX_1D; } m_Index[1].m_RelIndex = RelRegIndex1; m_Index[1].m_RelIndex1 = RelRegIndex11; m_Index[1].m_ComponentName = RelComponentName1; m_Index[1].m_MinPrecision = RelReg1MinPrecision; if( RelReg1MinPrecision != D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT ) { m_Index[1].m_bExtendedOperand = true; m_Index[1].m_ExtendedOperandType = D3D10_SB_EXTENDED_OPERAND_MODIFIER; // piggybacking on modifier token for minprecision } else { m_Index[1].m_bExtendedOperand = false; } } friend class CShaderAsm; friend class CShaderCodeParser; friend class CInstruction; }; //***************************************************************************** // // CInstruction // //***************************************************************************** // Structures for additional per-instruction fields unioned in CInstruction. // These structures don't contain ALL info used by the particular instruction, // only additional info not already in CInstruction. Some instructions don't // need such structures because CInstruction already has the correct data // fields. struct CGlobalFlagsDecl { UINT Flags; }; struct CInputSystemInterpretedValueDecl { D3D10_SB_NAME Name; }; struct CInputSystemGeneratedValueDecl { D3D10_SB_NAME Name; }; struct CInputPSDecl { D3D10_SB_INTERPOLATION_MODE InterpolationMode; }; struct CInputPSSystemInterpretedValueDecl { D3D10_SB_NAME Name; D3D10_SB_INTERPOLATION_MODE InterpolationMode; }; struct CInputPSSystemGeneratedValueDecl { D3D10_SB_NAME Name; D3D10_SB_INTERPOLATION_MODE InterpolationMode; }; struct COutputSystemInterpretedValueDecl { D3D10_SB_NAME Name; }; struct COutputSystemGeneratedValueDecl { D3D10_SB_NAME Name; }; struct CIndexRangeDecl { UINT RegCount; }; struct CResourceDecl { D3D10_SB_RESOURCE_DIMENSION Dimension; D3D10_SB_RESOURCE_RETURN_TYPE ReturnType[4]; UINT SampleCount; }; struct CConstantBufferDecl { D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN AccessPattern; }; struct COutputTopologyDecl { D3D10_SB_PRIMITIVE_TOPOLOGY Topology; }; struct CInputPrimitiveDecl { D3D10_SB_PRIMITIVE Primitive; }; struct CGSMaxOutputVertexCountDecl { UINT MaxOutputVertexCount; }; struct CGSInstanceCountDecl { UINT InstanceCount; }; struct CSamplerDecl { D3D10_SB_SAMPLER_MODE SamplerMode; }; struct CStreamDecl { UINT Stream; }; struct CTempsDecl { UINT NumTemps; }; struct CIndexableTempDecl { UINT IndexableTempNumber; UINT NumRegisters; UINT Mask; // .x, .xy, .xzy or .xyzw (D3D10_SB_OPERAND_4_COMPONENT_MASK_* ) }; struct CHSDSInputControlPointCountDecl { UINT InputControlPointCount; }; struct CHSOutputControlPointCountDecl { UINT OutputControlPointCount; }; struct CTessellatorDomainDecl { D3D11_SB_TESSELLATOR_DOMAIN TessellatorDomain; }; struct CTessellatorPartitioningDecl { D3D11_SB_TESSELLATOR_PARTITIONING TessellatorPartitioning; }; struct CTessellatorOutputPrimitiveDecl { D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE TessellatorOutputPrimitive; }; struct CHSMaxTessFactorDecl { float MaxTessFactor; }; struct CHSForkPhaseInstanceCountDecl { UINT InstanceCount; }; struct CHSJoinPhaseInstanceCountDecl { UINT InstanceCount; }; struct CShaderMessage { D3D11_SB_SHADER_MESSAGE_ID MessageID; D3D11_SB_SHADER_MESSAGE_FORMAT FormatStyle; PCSTR pFormatString; UINT NumOperands; COperandBase* pOperands; }; struct CCustomData { D3D10_SB_CUSTOMDATA_CLASS Type; UINT DataSizeInBytes; void* pData; union { CShaderMessage ShaderMessage; }; }; struct CFunctionTableDecl { UINT FunctionTableNumber; UINT TableLength; UINT* pFunctionIdentifiers; }; struct CInterfaceDecl { WORD InterfaceNumber; WORD ArrayLength; UINT ExpectedTableSize; UINT TableLength; UINT* pFunctionTableIdentifiers; bool bDynamicallyIndexed; }; struct CFunctionBodyDecl { UINT FunctionBodyNumber; }; struct CInterfaceCall { UINT FunctionIndex; COperandBase* pInterfaceOperand; }; struct CThreadGroupDeclaration { UINT x; UINT y; UINT z; }; struct CTypedUAVDeclaration { D3D10_SB_RESOURCE_DIMENSION Dimension; D3D10_SB_RESOURCE_RETURN_TYPE ReturnType[4]; UINT Flags; }; struct CStructuredUAVDeclaration { UINT ByteStride; UINT Flags; }; struct CRawUAVDeclaration { UINT Flags; }; struct CRawTGSMDeclaration { UINT ByteCount; }; struct CStructuredTGSMDeclaration { UINT StructByteStride; UINT StructCount; }; struct CStructuredSRVDeclaration { UINT ByteStride; }; struct CSyncFlags { bool bThreadsInGroup; bool bThreadGroupSharedMemory; bool bUnorderedAccessViewMemoryGlobal; bool bUnorderedAccessViewMemoryGroup; // exclusive to global }; class CInstruction { protected: static const UINT MAX_PRIVATE_DATA_COUNT = 2; public: CInstruction():m_OpCode(D3D10_SB_OPCODE_ADD) { Clear(); } CInstruction(D3D10_SB_OPCODE_TYPE OpCode) { Clear(); m_OpCode = OpCode; m_NumOperands = 0; m_ExtendedOpCodeCount = 0; } CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0, D3D10_SB_INSTRUCTION_TEST_BOOLEAN Test) { Clear(); m_OpCode = OpCode; m_NumOperands = 1; m_ExtendedOpCodeCount = 0; m_Test = Test; m_Operands[0] = Operand0; } CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0, COperandBase& Operand1) { Clear(); m_OpCode = OpCode; m_NumOperands = 2; m_ExtendedOpCodeCount = 0; m_Operands[0] = Operand0; m_Operands[1] = Operand1; } CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0, COperandBase& Operand1, COperandBase& Operand2) { Clear(); m_OpCode = OpCode; m_NumOperands = 3; m_ExtendedOpCodeCount = 0; m_Operands[0] = Operand0; m_Operands[1] = Operand1; m_Operands[2] = Operand2; } CInstruction(D3D10_SB_OPCODE_TYPE OpCode, COperandBase& Operand0, COperandBase& Operand1, COperandBase& Operand2, COperandBase& Operand3) { Clear(); m_OpCode = OpCode; m_NumOperands = 4; m_ExtendedOpCodeCount = 0; m_Operands[0] = Operand0; m_Operands[1] = Operand1; m_Operands[2] = Operand2; m_Operands[3] = Operand3; memset(m_TexelOffset, 0, sizeof(m_TexelOffset)); } void ClearAllocations() { if (m_OpCode == D3D10_SB_OPCODE_CUSTOMDATA) { free(m_CustomData.pData); if (m_CustomData.Type == D3D11_SB_CUSTOMDATA_SHADER_MESSAGE) { free(m_CustomData.ShaderMessage.pOperands); } } else if( m_OpCode == D3D11_SB_OPCODE_DCL_FUNCTION_TABLE ) { free(m_FunctionTableDecl.pFunctionIdentifiers); } else if( m_OpCode == D3D11_SB_OPCODE_DCL_INTERFACE ) { free(m_InterfaceDecl.pFunctionTableIdentifiers); } } void Clear(bool bIncludeCustomData = false) { if( bIncludeCustomData ) // don't need to do this on initial constructor, only if recycling the object. { ClearAllocations(); } memset (this, 0, sizeof(*this)); } ~CInstruction() { ClearAllocations(); } const COperandBase& Operand(UINT Index) const {return m_Operands[Index];} D3D10_SB_OPCODE_TYPE OpCode() const {return m_OpCode;} void SetNumOperands(UINT NumOperands) {m_NumOperands = NumOperands;} UINT NumOperands() const {return m_NumOperands;} void SetTest(D3D10_SB_INSTRUCTION_TEST_BOOLEAN Test) {m_Test = Test;} void SetPreciseMask(UINT PreciseMask) {m_PreciseMask = PreciseMask;} D3D10_SB_INSTRUCTION_TEST_BOOLEAN Test() const {return m_Test;} void SetTexelOffset( const INT8 texelOffset[3] ) { m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS; memcpy(m_TexelOffset, texelOffset,sizeof(m_TexelOffset)); } void SetTexelOffset( INT8 x, INT8 y, INT8 z) { m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS; m_TexelOffset[0] = x; m_TexelOffset[1] = y; m_TexelOffset[2] = z; } void SetResourceDim(D3D10_SB_RESOURCE_DIMENSION Dim, D3D10_SB_RESOURCE_RETURN_TYPE RetType[4], UINT StructureStride) { m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM; m_OpCodeEx[m_ExtendedOpCodeCount++] = D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE; m_ResourceDimEx = Dim; m_ResourceDimStructureStrideEx = StructureStride; memcpy(m_ResourceReturnTypeEx, RetType,4*sizeof(D3D10_SB_RESOURCE_RETURN_TYPE)); } BOOL Disassemble(__out_ecount(StringSize) LPSTR pString, UINT StringSize); // Private data is used by D3D runtime void SetPrivateData(UINT Value, UINT index = 0) { if (index < MAX_PRIVATE_DATA_COUNT) { m_PrivateData[index] = Value; } } UINT PrivateData(UINT index = 0) const { if (index >= MAX_PRIVATE_DATA_COUNT) return 0xFFFFFFFF; return m_PrivateData[index]; } // Get the precise mask UINT GetPreciseMask() const {return m_PreciseMask;} D3D10_SB_OPCODE_TYPE m_OpCode; COperandBase m_Operands[D3D10_SB_MAX_INSTRUCTION_OPERANDS]; UINT m_NumOperands; UINT m_ExtendedOpCodeCount; UINT m_PreciseMask; D3D10_SB_EXTENDED_OPCODE_TYPE m_OpCodeEx[D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES]; INT8 m_TexelOffset[3]; // for extended opcode only D3D10_SB_RESOURCE_DIMENSION m_ResourceDimEx; // for extended opcode only UINT m_ResourceDimStructureStrideEx; // for extended opcode only D3D10_SB_RESOURCE_RETURN_TYPE m_ResourceReturnTypeEx[4]; // for extended opcode only UINT m_PrivateData[MAX_PRIVATE_DATA_COUNT]; BOOL m_bSaturate; union // extra info needed by some instructions { CInputSystemInterpretedValueDecl m_InputDeclSIV; CInputSystemGeneratedValueDecl m_InputDeclSGV; CInputPSDecl m_InputPSDecl; CInputPSSystemInterpretedValueDecl m_InputPSDeclSIV; CInputPSSystemGeneratedValueDecl m_InputPSDeclSGV; COutputSystemInterpretedValueDecl m_OutputDeclSIV; COutputSystemGeneratedValueDecl m_OutputDeclSGV; CIndexRangeDecl m_IndexRangeDecl; CResourceDecl m_ResourceDecl; CConstantBufferDecl m_ConstantBufferDecl; CInputPrimitiveDecl m_InputPrimitiveDecl; COutputTopologyDecl m_OutputTopologyDecl; CGSMaxOutputVertexCountDecl m_GSMaxOutputVertexCountDecl; CGSInstanceCountDecl m_GSInstanceCountDecl; CSamplerDecl m_SamplerDecl; CStreamDecl m_StreamDecl; CTempsDecl m_TempsDecl; CIndexableTempDecl m_IndexableTempDecl; CGlobalFlagsDecl m_GlobalFlagsDecl; CCustomData m_CustomData; CInterfaceDecl m_InterfaceDecl; CFunctionTableDecl m_FunctionTableDecl; CFunctionBodyDecl m_FunctionBodyDecl; CInterfaceCall m_InterfaceCall; D3D10_SB_INSTRUCTION_TEST_BOOLEAN m_Test; D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE m_ResInfoReturnType; D3D10_SB_INSTRUCTION_RETURN_TYPE m_InstructionReturnType; CHSDSInputControlPointCountDecl m_InputControlPointCountDecl; CHSOutputControlPointCountDecl m_OutputControlPointCountDecl; CTessellatorDomainDecl m_TessellatorDomainDecl; CTessellatorPartitioningDecl m_TessellatorPartitioningDecl; CTessellatorOutputPrimitiveDecl m_TessellatorOutputPrimitiveDecl; CHSMaxTessFactorDecl m_HSMaxTessFactorDecl; CHSForkPhaseInstanceCountDecl m_HSForkPhaseInstanceCountDecl; CHSJoinPhaseInstanceCountDecl m_HSJoinPhaseInstanceCountDecl; CThreadGroupDeclaration m_ThreadGroupDecl; CTypedUAVDeclaration m_TypedUAVDecl; CStructuredUAVDeclaration m_StructuredUAVDecl; CRawUAVDeclaration m_RawUAVDecl; CStructuredTGSMDeclaration m_StructuredTGSMDecl; CStructuredSRVDeclaration m_StructuredSRVDecl; CRawTGSMDeclaration m_RawTGSMDecl; CSyncFlags m_SyncFlags; }; }; // **************************************************************************** // // class CShaderAsm // // The class is used to build a binary representation of a shader. // Usage scenario: // 1. Call Init with the initial internal buffer size in UINTs. The // internal buffer will grow if needed // 2. Call StartShader() // 3. Call Emit*() functions to assemble a shader // 4. Call EndShader() // 5. Call GetShader() to get the binary representation // // // **************************************************************************** class CShaderAsm { public: CShaderAsm(): m_dwFunc(NULL), m_Index(0), m_StartOpIndex(0), m_BufferSize(0) { Init(1024); }; ~CShaderAsm() { free(m_dwFunc); }; // Initializes the object with the initial buffer size in UINTs HRESULT Init(UINT BufferSize) { if( BufferSize >= UINT( -1 ) / sizeof( UINT ) ) { return E_OUTOFMEMORY; } m_dwFunc = (UINT*)malloc(BufferSize*sizeof(UINT)); if (m_dwFunc == NULL) { return E_OUTOFMEMORY; } m_BufferSize = BufferSize; Reset(); return S_OK; } UINT* GetShader() {return m_dwFunc;} UINT ShaderSizeInDWORDs() {return m_Index;} UINT ShaderSizeInBytes() {return ShaderSizeInDWORDs() * sizeof(*m_dwFunc);} UINT LastInstOffsetInDWORDs() {return m_StartOpIndex;} UINT LastInstOffsetInBytes() {return LastInstOffsetInDWORDs() * sizeof(*m_dwFunc);} // This function should be called to mark the start of a shader void StartShader(D3D10_SB_TOKENIZED_PROGRAM_TYPE ShaderType, UINT vermajor,UINT verminor) { Reset(); UINT Token = ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(ShaderType, vermajor, verminor); OPCODE(Token); OPCODE(0); // Reserve space for length } // Should be called at the end of the shader void EndShader() { if (1 < m_BufferSize) m_dwFunc[1] = ENCODE_D3D10_SB_TOKENIZED_PROGRAM_LENGTH(m_Index); } // Emit a resource declaration void EmitResourceDecl(D3D10_SB_RESOURCE_DIMENSION Dimension, UINT TRegIndex, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) | ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) ); EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex)); FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3)); ENDINSTRUCTION(); } // Emit a resource declaration (multisampled) void EmitResourceMSDecl(D3D10_SB_RESOURCE_DIMENSION Dimension, UINT TRegIndex, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW, UINT SampleCount) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) | ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) | ENCODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(SampleCount)); EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex)); FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3)); ENDINSTRUCTION(); } // Emit a sampler declaration void EmitSamplerDecl(UINT SRegIndex, D3D10_SB_SAMPLER_MODE Mode) { m_bExecutableInstruction = FALSE; OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) | ENCODE_D3D10_SB_SAMPLER_MODE(Mode) ); EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_SAMPLER, SRegIndex)); ENDINSTRUCTION(); } // Emit a stream declaration void EmitStreamDecl(UINT SRegIndex) { m_bExecutableInstruction = FALSE; OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_STREAM) ); EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_STREAM, SRegIndex)); ENDINSTRUCTION(); } // Emit an input declaration void EmitInputDecl(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(RegType, RegIndex, WriteMask, MinPrecision)); ENDINSTRUCTION(); } void EmitInputDecl2D(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT RegIndex2, UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(RegType, RegIndex, RegIndex2, WriteMask, MinPrecision)); ENDINSTRUCTION(); } // Emit an input declaration for a system interpreted value void EmitInputSystemInterpretedValueDecl(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SIV)); EmitOperand(COperandDst(RegType, RegIndex, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } void EmitInputSystemInterpretedValueDecl2D(UINT RegIndex, UINT RegIndex2, UINT WriteMask, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SIV)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, RegIndex2, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } // Emit an input declaration for a system generated value void EmitInputSystemGeneratedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SGV)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } void EmitInputSystemGeneratedValueDecl2D(UINT RegIndex, UINT RegIndex2, UINT WriteMask, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_SGV)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, RegIndex2, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } // Emit a PS input declaration void EmitPSInputDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_INTERPOLATION_MODE Mode, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS) | ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Mode)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision)); ENDINSTRUCTION(); } // Emit a PS input declaration for a system interpreted value void EmitPSInputSystemInterpretedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_INTERPOLATION_MODE Mode, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV) | ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Mode)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } // Emit a PS input declaration for a system generated value void EmitPSInputSystemGeneratedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_INTERPOLATION_MODE Mode, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV) | ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Mode)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } // Emit input coverage mask declaration void EmitInputCoverageMaskDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK, MinPrecision)); ENDINSTRUCTION(); } // Emit cycle counter decl void EmitCycleCounterDecl(UINT WriteMask) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(WriteMask,D3D11_SB_OPERAND_TYPE_CYCLE_COUNTER)); ENDINSTRUCTION(); } // Emit input primitive id declaration void EmitInputPrimIdDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID, MinPrecision)); ENDINSTRUCTION(); } // Emit input domain point declaration void EmitInputDomainPointDecl(UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(WriteMask,D3D11_SB_OPERAND_TYPE_INPUT_DOMAIN_POINT, MinPrecision)); ENDINSTRUCTION(); } // Emit and oDepth declaration void EmitODepthDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH, MinPrecision)); ENDINSTRUCTION(); } // Emit and oDepthGE declaration void EmitODepthDeclGE(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT)); EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL, MinPrecision)); ENDINSTRUCTION(); } // Emit and oDepthLE declaration void EmitODepthDeclLE(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT)); EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL, MinPrecision)); ENDINSTRUCTION(); } // Emit an oMask declaration void EmitOMaskDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK, MinPrecision)); ENDINSTRUCTION(); } // Emit an output declaration void EmitOutputDecl(UINT RegIndex, UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask, MinPrecision)); ENDINSTRUCTION(); } // Emit an output declaration for a system interpreted value void EmitOutputSystemInterpretedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT_SIV)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } // Emit an output declaration for a system generated value void EmitOutputSystemGeneratedValueDecl(UINT RegIndex, UINT WriteMask, D3D10_SB_NAME Name, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_OUTPUT_SGV)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask, MinPrecision)); FUNC(ENCODE_D3D10_SB_NAME(Name)); ENDINSTRUCTION(); } // Emit an input register indexing range declaration void EmitInputIndexingRangeDecl(UINT RegIndex, UINT Count, UINT WriteMask) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, WriteMask)); FUNC((UINT)Count); ENDINSTRUCTION(); } // 2D indexing range decl (indexing is for second dimension) void EmitInputIndexingRangeDecl2D(UINT RegIndex, UINT RegIndex2Min, UINT Reg2Count, UINT WriteMask) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_INPUT, RegIndex, RegIndex2Min, WriteMask)); FUNC((UINT)Reg2Count); ENDINSTRUCTION(); } // Emit an output register indexing range declaration void EmitOutputIndexingRangeDecl(UINT RegIndex, UINT Count, UINT WriteMask) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE)); EmitOperand(COperandDst(D3D10_SB_OPERAND_TYPE_OUTPUT, RegIndex, WriteMask)); FUNC((UINT)Count); ENDINSTRUCTION(); } // Emit indexing range decl taking reg type as parameter // (for things other than plain input or output regs) void EmitIndexingRangeDecl(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT Count, UINT WriteMask) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE)); EmitOperand(COperandDst(RegType, RegIndex, WriteMask)); FUNC((UINT)Count); ENDINSTRUCTION(); } // 2D indexing range decl (indexing is for second dimension) // Emit indexing range decl taking reg type as parameter // (for things other than plain input or output regs) void EmitIndexingRangeDecl2D(D3D10_SB_OPERAND_TYPE RegType, UINT RegIndex, UINT RegIndex2Min, UINT Reg2Count, UINT WriteMask) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEX_RANGE)); EmitOperand(COperandDst(RegType, RegIndex, RegIndex2Min, WriteMask)); FUNC((UINT)Reg2Count); ENDINSTRUCTION(); } // Emit a temp registers ( r0...r(n-1) ) declaration void EmitTempsDecl(UINT NumTemps) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_TEMPS)); FUNC((UINT)NumTemps); ENDINSTRUCTION(); } // Emit an indexable temp register (x#) declaration void EmitIndexableTempDecl(UINT TempNumber, UINT RegCount, UINT ComponentCount ) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP)); FUNC((UINT)TempNumber); FUNC((UINT)RegCount); FUNC((UINT)ComponentCount); ENDINSTRUCTION(); } // Emit a constant buffer (cb#) declaration void EmitConstantBufferDecl(UINT RegIndex, UINT Size, // size 0 means unknown/any size D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN AccessPattern) { m_bExecutableInstruction = FALSE; OPCODE( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) | ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(AccessPattern)); EmitOperand(COperand2D(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, RegIndex, Size)); ENDINSTRUCTION(); } // Emit Immediate Constant Buffer (icb) declaration void EmitImmediateConstantBufferDecl(UINT Num4Tuples, const UINT* pImmediateConstantBufferData) { m_bExecutableInstruction = FALSE; EmitCustomData( D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER, 4*Num4Tuples /*2 UINTS will be added during encoding */, pImmediateConstantBufferData); } // Emit a GS input primitive declaration void EmitGSInputPrimitiveDecl(D3D10_SB_PRIMITIVE Primitive) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE) | ENCODE_D3D10_SB_GS_INPUT_PRIMITIVE(Primitive)); ENDINSTRUCTION(); } // Emit a GS output topology declaration void EmitGSOutputTopologyDecl(D3D10_SB_PRIMITIVE_TOPOLOGY Topology) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY) | ENCODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(Topology)); ENDINSTRUCTION(); } // Emit GS Maximum Output Vertex Count declaration void EmitGSMaxOutputVertexCountDecl(UINT Count) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT)); FUNC((UINT)Count); ENDINSTRUCTION(); } // Emit input GS instance count declaration void EmitInputGSInstanceCountDecl( UINT Instances ) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT)); FUNC(Instances); ENDINSTRUCTION(); } // Emit input GS instance ID declaration void EmitInputGSInstanceIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_GS_INSTANCE_ID, MinPrecision)); ENDINSTRUCTION(); } // Emit global flags declaration void EmitGlobalFlagsDecl(UINT Flags) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) | ENCODE_D3D10_SB_GLOBAL_FLAGS(Flags)); ENDINSTRUCTION(); } // Emit interface function body declaration void EmitFunctionBodyDecl(UINT uFunctionID) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_FUNCTION_BODY)); FUNC(uFunctionID); ENDINSTRUCTION(); } void EmitFunctionTableDecl(UINT uFunctionTableID, UINT uTableSize, UINT *pTableEntries) { m_bExecutableInstruction = FALSE; bool bExtended = (3 + uTableSize) > MAX_INSTRUCTION_LENGTH; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_FUNCTION_TABLE) | ENCODE_D3D10_SB_OPCODE_EXTENDED(bExtended)); if( bExtended ) FUNC(0); FUNC(uFunctionTableID); FUNC(uTableSize); if( m_Index + uTableSize >= m_BufferSize ) { Reserve(uTableSize); } memcpy(&m_dwFunc[m_Index],pTableEntries,sizeof(UINT)*uTableSize); m_Index += uTableSize; ENDLONGINSTRUCTION(bExtended); } void EmitInterfaceDecl(UINT uInterfaceID, bool bDynamicIndexed, UINT uArrayLength, UINT uExpectedTableSize, __in_range(0, D3D11_SB_MAX_NUM_TYPES) UINT uNumTypes, __in_ecount(uNumTypes) UINT *pTableEntries) { m_bExecutableInstruction = FALSE; bool bExtended = (4 + uNumTypes) > MAX_INSTRUCTION_LENGTH; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_INTERFACE) | ENCODE_D3D11_SB_INTERFACE_INDEXED_BIT(bDynamicIndexed) | ENCODE_D3D10_SB_OPCODE_EXTENDED(bExtended)); if( bExtended ) FUNC(0); FUNC(uInterfaceID); FUNC(uExpectedTableSize); FUNC(ENCODE_D3D11_SB_INTERFACE_TABLE_LENGTH(uNumTypes) | ENCODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(uArrayLength)); if( m_Index + uNumTypes >= m_BufferSize ) { Reserve(uNumTypes); } memcpy(&m_dwFunc[m_Index],pTableEntries,sizeof(UINT)*uNumTypes); m_Index += uNumTypes; ENDLONGINSTRUCTION(bExtended); } void EmitInterfaceCall(COperandBase &InterfaceOperand, UINT uFunctionIndex) { m_bExecutableInstruction = TRUE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_INTERFACE_CALL)); FUNC(uFunctionIndex); EmitOperand(InterfaceOperand); ENDINSTRUCTION(); } void EmitInputControlPointCountDecl(UINT Count) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT) | ENCODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(Count)); ENDINSTRUCTION(); } void EmitOutputControlPointCountDecl(UINT Count) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT) | ENCODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(Count)); ENDINSTRUCTION(); } void EmitTessellatorDomainDecl(D3D11_SB_TESSELLATOR_DOMAIN Domain) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_TESS_DOMAIN) | ENCODE_D3D11_SB_TESS_DOMAIN(Domain)); ENDINSTRUCTION(); } void EmitTessellatorPartitioningDecl(D3D11_SB_TESSELLATOR_PARTITIONING Partitioning) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_TESS_PARTITIONING) | ENCODE_D3D11_SB_TESS_PARTITIONING(Partitioning)); ENDINSTRUCTION(); } void EmitTessellatorOutputPrimitiveDecl(D3D11_SB_TESSELLATOR_OUTPUT_PRIMITIVE OutputPrimitive) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE) | ENCODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(OutputPrimitive)); ENDINSTRUCTION(); } void EmitHSMaxTessFactorDecl(float MaxTessFactor) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR)); UINT uTemp = *(UINT*)&MaxTessFactor; FUNC(uTemp); ENDINSTRUCTION(); } void EmitHSForkPhaseInstanceCountDecl(UINT InstanceCount) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT)); FUNC(InstanceCount); ENDINSTRUCTION(); } void EmitHSJoinPhaseInstanceCountDecl(UINT InstanceCount) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT)); FUNC(InstanceCount); ENDINSTRUCTION(); } void EmitHSBeginPhase(D3D10_SB_OPCODE_TYPE Phase) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(Phase)); ENDINSTRUCTION(); } void EmitInputOutputControlPointIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID, MinPrecision)); ENDINSTRUCTION(); } void EmitInputForkInstanceIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_FORK_INSTANCE_ID, MinPrecision)); ENDINSTRUCTION(); } void EmitInputJoinInstanceIDDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID, MinPrecision)); ENDINSTRUCTION(); } void EmitThreadGroupDecl(UINT x, UINT y, UINT z) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_THREAD_GROUP)); FUNC(x); FUNC(y); FUNC(z); ENDINSTRUCTION(); } void EmitInputThreadIDDecl(UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(WriteMask, D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID, MinPrecision)); ENDINSTRUCTION(); } void EmitInputThreadGroupIDDecl(UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(WriteMask, D3D11_SB_OPERAND_TYPE_INPUT_THREAD_GROUP_ID, MinPrecision)); ENDINSTRUCTION(); } void EmitInputThreadIDInGroupDecl(UINT WriteMask, D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(WriteMask, D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP, MinPrecision)); ENDINSTRUCTION(); } void EmitInputThreadIDInGroupFlattenedDecl(D3D11_SB_OPERAND_MIN_PRECISION MinPrecision = D3D11_SB_OPERAND_MIN_PRECISION_DEFAULT) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT)); EmitOperand(COperandDst(D3D11_SB_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED, MinPrecision)); ENDINSTRUCTION(); } void EmitTypedUnorderedAccessViewDecl(D3D10_SB_RESOURCE_DIMENSION Dimension, UINT URegIndex, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForX, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForY, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForZ, D3D10_SB_RESOURCE_RETURN_TYPE ReturnTypeForW, UINT Flags) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) | ENCODE_D3D10_SB_RESOURCE_DIMENSION(Dimension) | ENCODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Flags)); EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, URegIndex)); FUNC(ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForX, 0) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForY, 1) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForZ, 2) | ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(ReturnTypeForW, 3)); ENDINSTRUCTION(); } void EmitRawUnorderedAccessViewDecl(UINT URegIndex, UINT Flags) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) | ENCODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Flags)); EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, URegIndex)); ENDINSTRUCTION(); } void EmitStructuredUnorderedAccessViewDecl(UINT URegIndex, UINT ByteStride, UINT Flags ) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED)| ENCODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Flags)| ENCODE_D3D11_SB_UAV_FLAGS(Flags)); // (same variable since both sets of flags are in the same space) EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, URegIndex)); FUNC(ByteStride); ENDINSTRUCTION(); } void EmitRawThreadGroupSharedMemoryDecl(UINT GRegIndex, UINT ByteCount ) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW)); EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY, GRegIndex)); FUNC(ByteCount); ENDINSTRUCTION(); } void EmitStructuredThreadGroupSharedMemoryDecl(UINT GRegIndex, UINT ByteStride, UINT StructCount ) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED)); EmitOperand(COperand(D3D11_SB_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY, GRegIndex)); FUNC(ByteStride); FUNC(StructCount); ENDINSTRUCTION(); } void EmitRawShaderResourceViewDecl(UINT TRegIndex) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW)); EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex)); ENDINSTRUCTION(); } void EmitStructuredShaderResourceViewDecl(UINT TRegIndex, UINT ByteStride) { m_bExecutableInstruction = FALSE; OPCODE(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED)); EmitOperand(COperand(D3D10_SB_OPERAND_TYPE_RESOURCE, TRegIndex)); FUNC(ByteStride); ENDINSTRUCTION(); } // Emit an instruction. Custom-data is not handled by this function. void EmitInstruction(const CInstruction& instruction); // Emit an operand void EmitOperand(const COperandBase& operand); // Emit an instruction without operands void Emit(UINT OpCode) { OPCODE(OpCode); ENDINSTRUCTION(); } void StartComplexEmit(UINT OpCode, UINT ReserveCount = MAX_INSTRUCTION_LENGTH) { OPCODE(OpCode); Reserve(ReserveCount); } void AddComplexEmit(UINT Data) { FUNC(Data); } void EndComplexEmit(bool bPatchLength = false) { ENDLONGINSTRUCTION(bPatchLength, !bPatchLength); } UINT GetComplexEmitPosition() { return m_Index; } void UpdateComplexEmitPosition(UINT Pos, UINT Data) { if (Pos < m_BufferSize) { m_dwFunc[Pos] = Data; } } void EmitCustomData( D3D10_SB_CUSTOMDATA_CLASS CustomDataClass, UINT SizeInUINTs /*2 UINTS will be added during encoding */, const UINT* pCustomData) { if( ((m_Index + SizeInUINTs) < m_Index) || // wrap (SizeInUINTs > 0xfffffffd) ) // need to add 2, also 0xffffffff isn't caught above { throw E_FAIL; } UINT FullSizeInUINTs = SizeInUINTs + 2; // include opcode and size if( m_Index + FullSizeInUINTs >= m_BufferSize ) // If custom data is going to overflow the buffer, reserve more memory { Reserve(FullSizeInUINTs); } if (m_Index < m_BufferSize) m_dwFunc[m_Index++] = ENCODE_D3D10_SB_CUSTOMDATA_CLASS(CustomDataClass); if (m_Index < m_BufferSize) m_dwFunc[m_Index++] = FullSizeInUINTs; if (m_Index < m_BufferSize) memcpy(&m_dwFunc[m_Index],pCustomData,sizeof(UINT)*SizeInUINTs); m_Index += SizeInUINTs; if (m_Index >= m_BufferSize) // If custom data is exactly fully filled the buffer, reserve more memory { Reserve(1024); } } // Returns number of executable instructions in the current shader UINT GetNumExecutableInstructions() {return m_NumExecutableInstructions;} protected: void OPCODE(UINT x) { if (m_Index < m_BufferSize) { m_dwFunc[m_Index] = x; m_StartOpIndex = m_Index++; } if (m_Index >= m_BufferSize) Reserve(1024); } // Should be called after end of each instruction void ENDINSTRUCTION() { if (m_StartOpIndex < m_Index) { m_dwFunc[m_StartOpIndex] |= ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(m_Index - m_StartOpIndex); Reserve(MAX_INSTRUCTION_LENGTH); m_StatementIndex++; if (m_bExecutableInstruction) m_NumExecutableInstructions++; m_bExecutableInstruction = true; } } void ENDLONGINSTRUCTION(bool bExtendedLength, bool bBaseLength = true) { if (m_StartOpIndex < m_Index) { if (bBaseLength) { m_dwFunc[m_StartOpIndex] |= ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(m_Index - m_StartOpIndex); } if( bExtendedLength ) { __analysis_assume(m_StartOpIndex + 1 < m_Index); m_dwFunc[m_StartOpIndex + 1] = m_Index - m_StartOpIndex; } Reserve(MAX_INSTRUCTION_LENGTH); m_StatementIndex++; if (m_bExecutableInstruction) m_NumExecutableInstructions++; m_bExecutableInstruction = true; } } void FUNC(UINT x) { if (m_Index < m_BufferSize) m_dwFunc[m_Index++] = x; if (m_Index >= m_BufferSize) Reserve(1024); } // Prepare assembler for a new shader void Reset() { m_Index = 0; m_StartOpIndex = 0; m_StatementIndex = 1; m_NumExecutableInstructions = 0; m_bExecutableInstruction = TRUE; } // Reserve SizeInUINTs UINTs in the m_dwFunc array void Reserve(UINT SizeInUINTs) { if( m_Index + SizeInUINTs < m_Index ) // overflow (prefix) { throw E_FAIL; } if (m_BufferSize < (m_Index + SizeInUINTs)) { UINT NewSize = m_BufferSize + SizeInUINTs + 1024; UINT* pNewBuffer = (UINT*)malloc(NewSize*sizeof(UINT)); if (pNewBuffer == NULL) { throw E_OUTOFMEMORY; } memcpy(pNewBuffer, m_dwFunc, sizeof(UINT)*m_Index); free(m_dwFunc); m_dwFunc = pNewBuffer; m_BufferSize = NewSize; } } // Buffer where the binary representation is built __field_ecount_part(m_BufferSize, m_Index) UINT* m_dwFunc; // Index where to place the next token in the m_dwFunc array UINT m_Index; // Index of the start of the current instruction in the m_dwFunc array UINT m_StartOpIndex; // Current buffer size in UINTs UINT m_BufferSize; // Current statement index of the current vertex shader UINT m_StatementIndex; // Number of executable instructions in the shader UINT m_NumExecutableInstructions; // "true" when the current instruction is executable bool m_bExecutableInstruction; }; //***************************************************************************** // // CShaderCodeParser // //***************************************************************************** class CShaderCodeParser { public: CShaderCodeParser(): m_pShaderCode(NULL), m_pCurrentToken(NULL), m_pShaderEndToken(NULL) { InitInstructionInfo(); } CShaderCodeParser(CONST CShaderToken* pBuffer): m_pShaderCode(NULL), m_pCurrentToken(NULL), m_pShaderEndToken(NULL) { InitInstructionInfo(); SetShader(pBuffer); } ~CShaderCodeParser() {} void SetShader(CONST CShaderToken* pBuffer); void ParseInstruction(CInstruction* pInstruction); void ParseIndex(COperandIndex* pOperandIndex, D3D10_SB_OPERAND_INDEX_REPRESENTATION IndexType); void ParseOperand(COperandBase* pOperand); BOOL EndOfShader() {return m_pCurrentToken >= m_pShaderEndToken;} D3D10_SB_TOKENIZED_PROGRAM_TYPE ShaderType(); UINT ShaderMinorVersion(); UINT ShaderMajorVersion(); UINT ShaderLengthInTokens(); UINT CurrentTokenOffset(); UINT CurrentTokenOffsetInBytes() { return CurrentTokenOffset() * sizeof(CShaderToken); } CONST CShaderToken* ParseOperandAt(COperandBase* pOperand, CONST CShaderToken* pBuffer, CONST CShaderToken* pBufferEnd) { CShaderToken* pCurTok = m_pCurrentToken; CShaderToken* pEndTok = m_pShaderEndToken; CShaderToken* pRet; m_pCurrentToken = (CShaderToken*)pBuffer; m_pShaderEndToken = (CShaderToken*)pBufferEnd; ParseOperand(pOperand); pRet = m_pCurrentToken; m_pCurrentToken = pCurTok; m_pShaderEndToken = pEndTok; return pRet; } protected: CShaderToken* m_pCurrentToken; CShaderToken* m_pShaderCode; // Points to the last token of the current shader CShaderToken* m_pShaderEndToken; }; }; // name space D3D10ShaderBinary #endif // _SHADERBINARY_H // End of file : ShaderBinary.h ================================================ FILE: include/SharedResourceHelpers.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { typedef UINT SharedResourceLocalHandle; class SOpenResourcePrivateData { public: SOpenResourcePrivateData(DeferredDestructionType deferredDestructionType) : m_deferredDestructionType(deferredDestructionType) {} DeferredDestructionType GetDeferredDestructionType() { return m_deferredDestructionType; } private: DeferredDestructionType m_deferredDestructionType; }; class SharedResourceHelpers { public: struct CreationFlags { bool SupportDisplayableTextures; }; SharedResourceHelpers(ImmediateContext& ImmCtx, CreationFlags const&) noexcept; static const UINT cPrivateResourceDriverDataSize = sizeof(SOpenResourcePrivateData); void TRANSLATION_API InitializePrivateDriverData(DeferredDestructionType destructionType, _Out_writes_bytes_(dataSize) void* pResourcePrivateDriverData, _In_ UINT dataSize); SharedResourceLocalHandle TRANSLATION_API CreateKMTHandle(_In_ HANDLE resourceHandle); SharedResourceLocalHandle TRANSLATION_API CreateKMTHandle(_In_ IUnknown* pResource); IUnknown* TRANSLATION_API QueryResourceFromKMTHandle(SharedResourceLocalHandle handle); void TRANSLATION_API QueryResourceInfoFromKMTHandle(SharedResourceLocalHandle handle, _In_opt_ const D3D11_RESOURCE_FLAGS* pOverrideFlags, _Out_ ResourceInfo* pResourceInfo); void TRANSLATION_API DestroyKMTHandle(SharedResourceLocalHandle handle); IUnknown* TRANSLATION_API DetachKMTHandle(SharedResourceLocalHandle handle); unique_comptr TRANSLATION_API OpenResourceFromKmtHandle( ResourceCreationArgs& createArgs, _In_ SharedResourceLocalHandle kmtHandle, _In_reads_bytes_(dataSize) void* pResourcePrivateDriverData, _In_ UINT dataSize, _In_ D3D12_RESOURCE_STATES currentState); private: ImmediateContext& m_ImmCtx; const CreationFlags m_CreationFlags; SharedResourceLocalHandle GetHandleForResource(_In_ IUnknown* pResource) noexcept(false); IUnknown* GetResourceFromHandle(_In_ SharedResourceLocalHandle handle) noexcept(false); std::vector m_OpenResourceMap; std::mutex m_OpenResourceMapLock; }; } ================================================ FILE: include/SubresourceHelpers.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { struct CBufferView {}; class CSubresourceSubset { public: CSubresourceSubset() noexcept {} explicit CSubresourceSubset(UINT8 NumMips, UINT16 NumArraySlices, UINT8 NumPlanes, UINT8 FirstMip = 0, UINT16 FirstArraySlice = 0, UINT8 FirstPlane = 0) noexcept; explicit CSubresourceSubset(const CBufferView&); explicit CSubresourceSubset(const D3D11_SHADER_RESOURCE_VIEW_DESC1&) noexcept; explicit CSubresourceSubset(const D3D11_UNORDERED_ACCESS_VIEW_DESC1&) noexcept; explicit CSubresourceSubset(const D3D11_RENDER_TARGET_VIEW_DESC1&) noexcept; explicit CSubresourceSubset(const D3D11_DEPTH_STENCIL_VIEW_DESC&) noexcept; explicit CSubresourceSubset(const D3D12_SHADER_RESOURCE_VIEW_DESC&) noexcept; explicit CSubresourceSubset(const D3D12_UNORDERED_ACCESS_VIEW_DESC&) noexcept; explicit CSubresourceSubset(const D3D12_RENDER_TARGET_VIEW_DESC&) noexcept; explicit CSubresourceSubset(const D3D12_DEPTH_STENCIL_VIEW_DESC&) noexcept; explicit CSubresourceSubset(const D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC&, DXGI_FORMAT ResourceFormat) noexcept; explicit CSubresourceSubset(const D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC&, DXGI_FORMAT ResourceFormat) noexcept; explicit CSubresourceSubset(const D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC&, DXGI_FORMAT ResourceFormat) noexcept; explicit CSubresourceSubset(const VIDEO_DECODER_OUTPUT_VIEW_DESC_INTERNAL&) noexcept; explicit CSubresourceSubset(const VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL&) noexcept; explicit CSubresourceSubset(const VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_INTERNAL&) noexcept; SIZE_T DoesNotOverlap(const CSubresourceSubset&) const noexcept; UINT Mask() const noexcept; // Only useable/used when the result will fit in 32 bits. UINT NumNonExtendedSubresources() const noexcept; UINT NumExtendedSubresources() const noexcept; public: UINT16 m_BeginArray; // Also used to store Tex3D slices. UINT16 m_EndArray; // End - Begin == Array Slices UINT8 m_BeginMip; UINT8 m_EndMip; // End - Begin == Mip Levels UINT8 m_BeginPlane; UINT8 m_EndPlane; }; inline void DecomposeSubresourceIdxNonExtended(UINT Subresource, UINT NumMips, _Out_ UINT& MipLevel, _Out_ UINT& ArraySlice) { MipLevel = Subresource % NumMips; ArraySlice = Subresource / NumMips; } inline void DecomposeSubresourceIdxNonExtended(UINT Subresource, UINT8 NumMips, _Out_ UINT8& MipLevel, _Out_ UINT16& ArraySlice) { MipLevel = Subresource % NumMips; ArraySlice = static_cast(Subresource / NumMips); } template inline void DecomposeSubresourceIdxExtended(UINT Subresource, UINT NumMips, UINT ArraySize, _Out_ T& MipLevel, _Out_ U& ArraySlice, _Out_ V& PlaneSlice) { D3D12DecomposeSubresource(Subresource, NumMips, ArraySize, MipLevel, ArraySlice, PlaneSlice); } inline UINT DecomposeSubresourceIdxExtendedGetMip(UINT Subresource, UINT NumMips) { return Subresource % NumMips; } inline UINT ComposeSubresourceIdxNonExtended(UINT MipLevel, UINT ArraySlice, UINT NumMips) { return D3D11CalcSubresource(MipLevel, ArraySlice, NumMips); } inline UINT ComposeSubresourceIdxExtended(UINT MipLevel, UINT ArraySlice, UINT PlaneSlice, UINT NumMips, UINT ArraySize) { return D3D12CalcSubresource(MipLevel, ArraySlice, PlaneSlice, NumMips, ArraySize); } inline UINT ComposeSubresourceIdxArrayThenPlane(UINT NumMips, UINT PlaneCount, UINT MipLevel, UINT ArraySlice, UINT PlaneSlice) { return (ArraySlice * PlaneCount * NumMips) + (PlaneSlice * NumMips) + MipLevel; } inline UINT ConvertSubresourceIndexAddPlane(UINT Subresource, UINT NumSubresourcesPerPlane, UINT PlaneSlice) { assert(Subresource < NumSubresourcesPerPlane || PlaneSlice == 0); return (Subresource + NumSubresourcesPerPlane * PlaneSlice); } inline UINT ConvertSubresourceIndexRemovePlane(UINT Subresource, UINT NumSubresourcesPerPlane) { return (Subresource % NumSubresourcesPerPlane); } inline UINT GetPlaneIdxFromSubresourceIdx(UINT Subresource, UINT NumSubresourcesPerPlane) { return (Subresource / NumSubresourcesPerPlane); } class CViewSubresourceSubset : public CSubresourceSubset { public: enum DepthStencilMode { ReadOnly, WriteOnly, ReadOrWrite }; public: CViewSubresourceSubset() {} explicit CViewSubresourceSubset(CSubresourceSubset const& Subresources, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); explicit CViewSubresourceSubset(const CBufferView&); CViewSubresourceSubset(const D3D11_SHADER_RESOURCE_VIEW_DESC1& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const D3D11_UNORDERED_ACCESS_VIEW_DESC1& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const D3D11_RENDER_TARGET_VIEW_DESC1& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const D3D11_DEPTH_STENCIL_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const D3D12_SHADER_RESOURCE_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const D3D12_UNORDERED_ACCESS_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const D3D12_RENDER_TARGET_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const D3D12_DEPTH_STENCIL_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount, DepthStencilMode DSMode = ReadOrWrite); CViewSubresourceSubset(const VIDEO_DECODER_OUTPUT_VIEW_DESC_INTERNAL& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); CViewSubresourceSubset(const VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_INTERNAL& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount); template static CViewSubresourceSubset FromView(const T* pView); public: class CViewSubresourceIterator; public: CViewSubresourceIterator begin() const; CViewSubresourceIterator end() const; bool IsWholeResource() const; bool IsEmpty() const; UINT ArraySize() const; UINT MinSubresource() const; UINT MaxSubresource() const; private: void Reduce(); protected: UINT8 m_MipLevels; UINT16 m_ArraySlices; UINT8 m_PlaneCount; }; // This iterator iterates over contiguous ranges of subresources within a subresource subset. eg: // // // For each contiguous subresource range. // for( CViewSubresourceSubset::CViewSubresourceIterator it = ViewSubset.begin(); it != ViewSubset.end(); ++it ) // { // // StartSubresource and EndSubresource members of the iterator describe the contiguous range. // for( UINT SubresourceIndex = it.StartSubresource(); SubresourceIndex < it.EndSubresource(); SubresourceIndex++ ) // { // // Action for each subresource within the current range. // } // } // class CViewSubresourceSubset::CViewSubresourceIterator { public: CViewSubresourceIterator(CViewSubresourceSubset const& SubresourceSet, UINT16 ArraySlice, UINT8 PlaneCount); CViewSubresourceIterator& operator++(); CViewSubresourceIterator& operator--(); bool operator==(CViewSubresourceIterator const& other) const; bool operator!=(CViewSubresourceIterator const& other) const; UINT StartSubresource() const; UINT EndSubresource() const; std::pair operator*() const; private: CViewSubresourceSubset const& m_Subresources; UINT16 m_CurrentArraySlice; UINT8 m_CurrentPlaneSlice; }; // Some helpers for tiled resource "flow" calculations to determine which subresources are affected by a tiled operation void CalcNewTileCoords(D3D11_TILED_RESOURCE_COORDINATE &Coord, UINT &NumTiles, D3D11_SUBRESOURCE_TILING const& SubresourceTiling); class CTileSubresourceSubset { public: CTileSubresourceSubset(const D3D11_TILED_RESOURCE_COORDINATE& StartCoord, const D3D11_TILE_REGION_SIZE& Region, D3D11_RESOURCE_DIMENSION ResDim, _In_reads_(NumStandardMips) const D3D11_SUBRESOURCE_TILING* pSubresourceTilings, UINT MipLevels, UINT NumStandardMips); class CIterator; CIterator begin() const; CIterator end() const; protected: UINT CalcSubresource(UINT SubresourceIdx) const; protected: bool m_bTargetingArraySlices; UINT m_FirstSubresource; UINT m_NumSubresourcesOrArraySlices; UINT m_MipsPerSlice; }; class CTileSubresourceSubset::CIterator { public: CIterator(CTileSubresourceSubset const& TileSubset, UINT SubresourceIdx); CIterator& operator++(); CIterator& operator--(); bool operator==(CIterator const& other) const; bool operator!=(CIterator const& other) const; UINT operator*() const; private: CTileSubresourceSubset const& m_TileSubset; UINT m_SubresourceIdx; }; template< bool Supported> struct ConvertToDescV1Support { static const bool supported = Supported; }; typedef ConvertToDescV1Support ConvertToDescV1NotSupported; typedef ConvertToDescV1Support ConvertToDescV1Supported; template struct DescToViewDimension : ConvertToDescV1NotSupported { static const UINT dimensionTexture2D = 0; static const UINT dimensionTexture2DArray = 0; }; template <> struct DescToViewDimension< D3D11_SHADER_RESOURCE_VIEW_DESC1 > : ConvertToDescV1Supported { static const D3D11_SRV_DIMENSION dimensionTexture2D = D3D11_SRV_DIMENSION_TEXTURE2D; static const D3D11_SRV_DIMENSION dimensionTexture2DArray = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; }; template <> struct DescToViewDimension< D3D11_RENDER_TARGET_VIEW_DESC1 > : ConvertToDescV1Supported { static const D3D11_RTV_DIMENSION dimensionTexture2D = D3D11_RTV_DIMENSION_TEXTURE2D; static const D3D11_RTV_DIMENSION dimensionTexture2DArray = D3D11_RTV_DIMENSION_TEXTURE2DARRAY; }; template <> struct DescToViewDimension< D3D11_UNORDERED_ACCESS_VIEW_DESC1 > : ConvertToDescV1Supported { static const D3D11_UAV_DIMENSION dimensionTexture2D = D3D11_UAV_DIMENSION_TEXTURE2D; static const D3D11_UAV_DIMENSION dimensionTexture2DArray = D3D11_UAV_DIMENSION_TEXTURE2DARRAY; }; template< typename T > inline bool IsPow2( T num ) { static_assert(static_cast(-1) > 0, "Signed type passed to IsPow2"); return !(num & (num - 1)); } }; ================================================ FILE: include/SwapChainHelper.hpp ================================================ #pragma once #include "d3dkmthk.h" namespace D3D12TranslationLayer { class SwapChainHelper { public: SwapChainHelper( IDXGISwapChain3* swapChain ); HRESULT StandardPresent( ImmediateContext& context, D3DKMT_PRESENT *pKMTPresent, Resource& presentingResource ); private: IDXGISwapChain3* m_swapChain; //weak-ref }; } ================================================ FILE: include/SwapChainManager.hpp ================================================ #pragma once namespace D3D12TranslationLayer { class SwapChainManager { public: SwapChainManager( D3D12TranslationLayer::ImmediateContext& ImmCtx ); ~SwapChainManager(); IDXGISwapChain3* GetSwapChainForWindow( HWND hwnd, Resource& presentingResource ); private: static constexpr UINT BufferCount = 2; D3D12TranslationLayer::ImmediateContext& m_ImmCtx; std::map> m_SwapChains; }; } ================================================ FILE: include/ThreadPool.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once class CThreadPoolWork { friend class CThreadPool; private: PTP_WORK m_pWork = nullptr; std::function m_WorkFunction; static void CALLBACK WorkCallback(PTP_CALLBACK_INSTANCE, PVOID Context, PTP_WORK) { CThreadPoolWork* pWork = reinterpret_cast(Context); pWork->m_WorkFunction(); } public: CThreadPoolWork() = default; // Non-copyable, non-movable, must be initialized in place to avoid spurious heap allocations. CThreadPoolWork(CThreadPoolWork const&) = delete; CThreadPoolWork(CThreadPoolWork&&) = delete; CThreadPoolWork& operator=(CThreadPoolWork const&) = delete; CThreadPoolWork& operator=(CThreadPoolWork&&) = delete; void Wait(bool bCancel = true) { if (m_pWork) { WaitForThreadpoolWorkCallbacks(m_pWork, bCancel); CloseThreadpoolWork(m_pWork); m_pWork = nullptr; } } operator bool() { return m_pWork != nullptr; } ~CThreadPoolWork() { Wait(true); } }; class CThreadPool { private: TP_CALLBACK_ENVIRON m_Environment; PTP_POOL m_pPool = nullptr; PTP_CLEANUP_GROUP m_pCleanup = nullptr; bool m_bCancelPendingWorkOnCleanup = true; public: CThreadPool() { InitializeThreadpoolEnvironment(&m_Environment); m_pPool = CreateThreadpool(nullptr); if (!m_pPool) { throw _com_error(HRESULT_FROM_WIN32(GetLastError())); } m_pCleanup = CreateThreadpoolCleanupGroup(); if (!m_pCleanup) { CloseThreadpool(m_pPool); throw _com_error(HRESULT_FROM_WIN32(GetLastError())); } // No more failures SetThreadpoolCallbackPool(&m_Environment, m_pPool); SetThreadpoolCallbackCleanupGroup(&m_Environment, m_pCleanup, nullptr); } ~CThreadPool() { CloseThreadpoolCleanupGroupMembers(m_pCleanup, m_bCancelPendingWorkOnCleanup, nullptr); CloseThreadpoolCleanupGroup(m_pCleanup); CloseThreadpool(m_pPool); DestroyThreadpoolEnvironment(&m_Environment); } // Noncopyable, non-movable since m_Environment is not a pointer. CThreadPool(CThreadPool const&) = delete; CThreadPool(CThreadPool&&) = delete; CThreadPool& operator=(CThreadPool const&) = delete; CThreadPool& operator=(CThreadPool&&) = delete; void SetCancelPendingWorkOnCleanup(bool bCancel) { m_bCancelPendingWorkOnCleanup = bCancel; } void QueueThreadpoolWork(CThreadPoolWork& Work, std::function WorkFunction) { if (Work.m_pWork != nullptr) { throw _com_error(E_INVALIDARG); } Work.m_WorkFunction = std::move(WorkFunction); Work.m_pWork = CreateThreadpoolWork(&CThreadPoolWork::WorkCallback, &Work, &m_Environment); if (!Work.m_pWork) { throw _com_error(HRESULT_FROM_WIN32(GetLastError())); } SubmitThreadpoolWork(Work.m_pWork); } }; ================================================ FILE: include/Util.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { #define ASSUME( _exp ) { assert( _exp ); __analysis_assume( _exp ); __assume( _exp ); } class ImmediateContext; class Resource; enum class COMMAND_LIST_TYPE { GRAPHICS = 0, VIDEO_DECODE = 1, VIDEO_PROCESS = 2, MAX_VALID = 3, UNKNOWN = MAX_VALID, }; const UINT COMMAND_LIST_TYPE_GRAPHICS_MASK = (1 << (UINT)COMMAND_LIST_TYPE::GRAPHICS); const UINT COMMAND_LIST_TYPE_VIDEO_DECODE_MASK = (1 << (UINT)COMMAND_LIST_TYPE::VIDEO_DECODE); const UINT COMMAND_LIST_TYPE_VIDEO_PROCESS_MASK = (1 << (UINT)COMMAND_LIST_TYPE::VIDEO_PROCESS); const UINT COMMAND_LIST_TYPE_VIDEO_MASK = COMMAND_LIST_TYPE_VIDEO_DECODE_MASK | COMMAND_LIST_TYPE_VIDEO_PROCESS_MASK; const UINT COMMAND_LIST_TYPE_ALL_MASK = COMMAND_LIST_TYPE_GRAPHICS_MASK | COMMAND_LIST_TYPE_VIDEO_DECODE_MASK | COMMAND_LIST_TYPE_VIDEO_PROCESS_MASK; const UINT COMMAND_LIST_TYPE_UNKNOWN_MASK = (1 << (UINT)COMMAND_LIST_TYPE::UNKNOWN); enum class AllocatorHeapType { None, Upload, Readback, Decoder, }; inline COMMAND_LIST_TYPE CommandListType(AllocatorHeapType HeapType) { if (HeapType == AllocatorHeapType::Decoder) { return COMMAND_LIST_TYPE::VIDEO_DECODE; } else { assert(HeapType != AllocatorHeapType::None); return COMMAND_LIST_TYPE::GRAPHICS; } } inline D3D12_HEAP_TYPE GetD3D12HeapType(AllocatorHeapType HeapType) { assert(HeapType != AllocatorHeapType::None); switch (HeapType) { case AllocatorHeapType::Readback: return D3D12_HEAP_TYPE_READBACK; case AllocatorHeapType::Upload: case AllocatorHeapType::Decoder: default: return D3D12_HEAP_TYPE_UPLOAD; } } // // Converts an HRESULT to an exception. This matches ThrowFailure used // elsewhere in dxg. // _At_(return, _When_(FAILED(hr), __analysis_noreturn)) inline void ThrowFailure(HRESULT hr) { if (FAILED(hr)) { throw _com_error(hr); } } _At_(return, _When_(h == nullptr, __analysis_noreturn)) inline void ThrowIfHandleNull(HANDLE h) { if (h == nullptr) { throw _com_error(HRESULT_FROM_WIN32(GetLastError())); } } class SafeHANDLE { public: SafeHANDLE() : m_h(NULL) { } ~SafeHANDLE() { if (m_h) CloseHandle(m_h); } operator HANDLE() const { return m_h; } HANDLE release() { HANDLE h = m_h; m_h = NULL; return h; } HANDLE m_h; }; class ThrowingSafeHandle : public SafeHANDLE { public: ThrowingSafeHandle(HANDLE h) noexcept(false) { if (h == NULL) { ThrowFailure(E_OUTOFMEMORY); } m_h = h; } }; inline void* AlignedHeapAlloc16(size_t size) noexcept { #ifdef _WIN64 return HeapAlloc(GetProcessHeap(), 0, size); #else size_t totalSize = size + 16; void* original = HeapAlloc(GetProcessHeap(), 0, totalSize); if (original == NULL) { return NULL; } const size_t alignedPtr = (reinterpret_cast(original) + 16) & ~(15); assert(alignedPtr > reinterpret_cast(original)); size_t offset = alignedPtr - reinterpret_cast(original); *(reinterpret_cast(alignedPtr) - 1) = static_cast(offset); return reinterpret_cast(alignedPtr); #endif } //HeapAlloc is guaranteed to be 8-byte aligned on x86, and 16-byte aligned on x64 //From windows\directx\dxg\d3d11\D3DCore\Inc\SWCommandList.hpp inline void AlignedHeapFree16(void* p) noexcept { if (p == NULL) return; #ifdef _WIN64 HeapFree(GetProcessHeap(), 0, p); #else char* pChar = reinterpret_cast(p); size_t offset = *(pChar - 1); HeapFree(GetProcessHeap(), 0, pChar - offset); #endif } template< typename T > inline T Align(T uValue, T uAlign) { T uResult; if (IsPow2(uAlign)) { T uMask = uAlign - 1; uResult = (uValue + uMask) & ~uMask; } else { uResult = ((uValue + uAlign - 1) / uAlign) * uAlign; } assert(uResult >= uValue); assert(0 == (uResult % uAlign)); return uResult; } template< typename T > inline T AlignAtLeast(T uValue, T uAlign) { return std::max(Align(uValue, uAlign), uAlign); } // Avoid including kernel libraries by adding list implementation here: inline BOOLEAN IsListEmpty(_In_ const LIST_ENTRY * ListHead) { return (BOOLEAN)(ListHead->Flink == ListHead); } inline void InitializeListHead(_Out_ PLIST_ENTRY ListHead) { ListHead->Flink = ListHead->Blink = ListHead; } inline BOOLEAN RemoveEntryList(_In_ PLIST_ENTRY Entry) { PLIST_ENTRY PrevEntry; PLIST_ENTRY NextEntry; NextEntry = Entry->Flink; PrevEntry = Entry->Blink; if ((NextEntry->Blink != Entry) || (PrevEntry->Flink != Entry)) { assert(false); } PrevEntry->Flink = NextEntry; NextEntry->Blink = PrevEntry; return (BOOLEAN)(PrevEntry == NextEntry); } inline void InsertHeadList(_Inout_ PLIST_ENTRY ListHead, _Out_ PLIST_ENTRY Entry) { PLIST_ENTRY NextEntry; NextEntry = ListHead->Flink; Entry->Flink = NextEntry; Entry->Blink = ListHead; if (NextEntry->Blink != ListHead) { assert(false); } NextEntry->Blink = Entry; ListHead->Flink = Entry; return; } inline void InsertTailList(_Inout_ PLIST_ENTRY ListHead, _Out_ __drv_aliasesMem PLIST_ENTRY Entry) { PLIST_ENTRY PrevEntry; PrevEntry = ListHead->Blink; Entry->Flink = ListHead; Entry->Blink = PrevEntry; if (PrevEntry->Flink != ListHead) { assert(false); } PrevEntry->Flink = Entry; ListHead->Blink = Entry; return; } enum EShaderStage : UINT8 { e_PS, e_VS, e_GS, e_HS, e_DS, e_CS, ShaderStageCount, // For UAVs, the EShaderStage enum is used for array indices. e_Graphics = 0, e_Compute = 1, UAVStageCount }; //================================================================================================================================== // // unique_comptr, like unique_ptr except for Ref-held // //================================================================================================================================== struct unique_comptr_deleter { template void operator()(T *pUC) const { pUC->Release(); } }; template struct unique_comptr : protected std::unique_ptr { static_assert(std::is_empty::value, "unique_comptr doesn't support stateful deleter."); typedef std::unique_ptr parent_t; using pointer = typename parent_t::pointer; unique_comptr() : parent_t(nullptr) { } explicit unique_comptr(T *p) : parent_t(p) { if (p) { p->AddRef(); } } template unique_comptr(unique_comptr && other) : parent_t(other.release()) { } template unique_comptr& operator=(unique_comptr && other) { parent_t::reset(other.release()); return *this; } unique_comptr& operator=(pointer p) { reset(p); return *this; } unique_comptr& operator=(std::nullptr_t p) { reset(p); return *this; } void reset(pointer p = pointer()) { if (p) { p->AddRef(); } parent_t::reset(p); } void reset(std::nullptr_t p) { parent_t::reset(p); } T** operator&() { assert(*this == nullptr); return reinterpret_cast(this); } T*const* operator&() const { return reinterpret_cast(this); } using parent_t::release; using parent_t::get; using parent_t::operator->; using parent_t::operator*; using parent_t::operator bool; private: unique_comptr& operator=(unique_comptr const&) = delete; unique_comptr(unique_comptr const&) = delete; }; template struct PreallocatedArray { T* const m_pBegin; T* m_pEnd; template PreallocatedArray(UINT ArraySize, void*& Address, TConstructionArgs&&... constructionArgs) : m_pBegin(reinterpret_cast(Address)) , m_pEnd(m_pBegin + ArraySize) { // Leave uninitialized otherwise if (!std::is_trivially_constructible::value) { for (T& t : *this) { new (std::addressof(t)) T(std::forward(constructionArgs)...); } } Address = m_pEnd; } ~PreallocatedArray() { clear(); } PreallocatedArray(PreallocatedArray const&) = delete; PreallocatedArray& operator=(PreallocatedArray const&) = delete; void clear() { if (!std::is_trivially_destructible::value) { for (T& t : *this) { t.~T(); } } m_pEnd = m_pBegin; } size_t size() const { return std::distance(m_pBegin, m_pEnd); } bool empty() const { return m_pBegin == m_pEnd; } T* begin() { return m_pBegin; } T const* begin() const { return m_pBegin; } T* end() { return m_pEnd; } T const* end() const { return m_pEnd; } T& operator[](UINT i) { assert(m_pBegin + i < m_pEnd); return m_pBegin[i]; } T const& operator[](UINT i) const { assert(m_pBegin + i < m_pEnd); return m_pBegin[i]; } }; template struct PreallocatedInlineArray { T m_InlineArray[InlineSize]; PreallocatedArray m_Extra; UINT m_Size; template PreallocatedInlineArray(UINT ArraySize, void*& Address, TConstructionArgs&&... constructionArgs) : m_Extra(ArraySize > InlineSize ? ArraySize - InlineSize : 0, Address, std::forward(constructionArgs)...) , m_Size(ArraySize) { // Leave uninitialized otherwise if constexpr (!std::is_trivially_constructible::value) { for (size_t i = 0; i < m_Size && i < InlineSize; ++i) { new (std::addressof(m_InlineArray[i])) T(std::forward(constructionArgs)...); } } } ~PreallocatedInlineArray() { clearInline(); } PreallocatedInlineArray(PreallocatedInlineArray const&) = delete; PreallocatedInlineArray& operator=(PreallocatedInlineArray const&) = delete; void clearInline() { if constexpr (!std::is_trivially_destructible::value) { for (UINT i = 0; i < m_Size && i < InlineSize; ++i) { m_InlineArray[i].~T(); } } } void clear() { clearInline(); m_Extra.clear(); m_Size = 0; } size_t size() const { return m_Size; } bool empty() const { return m_Size == 0; } T &operator[](UINT i) { assert(i < m_Size); return i < InlineSize ? m_InlineArray[i] : m_Extra[i - InlineSize]; } T const& operator[](UINT i) const { assert(i < m_Size); return i < InlineSize ? m_InlineArray[i] : m_Extra[i - InlineSize]; } }; #if TRANSLATION_LAYER_DBG enum ED3D11On12DebugFlags { Debug_FlushOnDraw = 0x1, Debug_FlushOnDispatch = 0x2, Debug_FlushOnRender = 0x4, Debug_DisableIncrementalBindings = 0x8, Debug_FlushOnDataUpload = 0x10, Debug_FlushOnCopy = 0x20, Debug_WaitOnFlush = 0x40, Debug_StallExecution = 0x80, }; #endif enum class ResourceAllocationContext { ImmediateContextThreadLongLived, ImmediateContextThreadTemporary, FreeThread, }; UINT GetByteAlignment(DXGI_FORMAT format); inline D3D12_RESOURCE_STATES GetDefaultPoolState(AllocatorHeapType heapType) { switch (heapType) { case AllocatorHeapType::Upload: return D3D12_RESOURCE_STATE_GENERIC_READ; case AllocatorHeapType::Readback: return D3D12_RESOURCE_STATE_COPY_DEST; case AllocatorHeapType::Decoder: default: return D3D12_RESOURCE_STATE_COMMON; } } inline D3D_FEATURE_LEVEL GetHardwareFeatureLevel(ID3D12Device *pDevice) { static const D3D_FEATURE_LEVEL RequestedD3D12FeatureLevels[] = { D3D_FEATURE_LEVEL_12_1, D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_1_0_CORE, }; D3D12_FEATURE_DATA_FEATURE_LEVELS featureLevels = { ARRAYSIZE(RequestedD3D12FeatureLevels), RequestedD3D12FeatureLevels }; ThrowFailure(pDevice->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &featureLevels, sizeof(featureLevels))); return featureLevels.MaxSupportedFeatureLevel; } template inline void SetFeatureDataNodeIndex(void *pFeatureSupportData, UINT FeatureSupportDataSize, UINT NodeIndex) { if (FeatureSupportDataSize != sizeof(T)) { ThrowFailure(E_INVALIDARG); } T *pSupportData = (T*)pFeatureSupportData; pSupportData->NodeIndex = NodeIndex; } template struct ScopeExit { ScopeExit(F &&f) : f(std::forward(f)) {} ~ScopeExit() { f(); } F f; }; template inline ScopeExit MakeScopeExit(F &&f) { return ScopeExit(std::forward(f)); }; template inline void hash_combine(size_t & seed, const T & v) { std::hash hasher; seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } template class OptLock { mutable std::optional m_Lock; public: std::unique_lock TakeLock() const { return m_Lock ? std::unique_lock(*m_Lock) : std::unique_lock(); } OptLock(bool bHaveLock = false) { if (bHaveLock) { m_Lock.emplace(); } } void EnsureLock() { if (!m_Lock) { m_Lock.emplace(); } } bool HasLock() const { return m_Lock.has_value(); } }; template struct CircularArray { T m_Array[Size]; struct iterator { using difference_type = ptrdiff_t; using value_type = T; using pointer = T*; using reference = T&; using iterator_category = std::random_access_iterator_tag; T* m_Begin; T* m_Current; iterator( T* Begin, T* Current ) : m_Begin( Begin ), m_Current( Current ) {} iterator increment( ptrdiff_t distance ) const { ptrdiff_t totalDistance = (distance + std::distance( m_Begin, m_Current )) % Size; totalDistance = totalDistance >= 0 ? totalDistance : totalDistance + Size; return iterator( m_Begin, m_Begin + totalDistance ); } iterator& operator++() { *this = increment( 1 ); return *this; } iterator operator++( int ) { iterator ret = *this; *this = increment( 1 ); return ret; } iterator& operator--() { *this = increment( -1 ); return *this; } iterator operator--( int ) { iterator ret = *this; *this = increment( -1 ); return ret; } iterator operator+( ptrdiff_t v ) { return increment( v ); } iterator& operator+=( ptrdiff_t v ) { *this = increment( v ); return *this; } iterator operator-( ptrdiff_t v ) { return increment( -v ); } iterator& operator-=( ptrdiff_t v ) { *this = increment( -v ); return *this; } bool operator==( iterator const& o ) const { return o.m_Begin == m_Begin && o.m_Current == m_Current; } bool operator!=( iterator const& o ) const { return !(o == *this); } reference operator*() { return *m_Current; } pointer operator->() { return m_Current; } ptrdiff_t operator-( iterator const& o ) const { assert( o.m_Begin == m_Begin ); ptrdiff_t rawDistance = std::distance( o.m_Current, m_Current ); return rawDistance >= 0 ? rawDistance : rawDistance + Size; } }; iterator begin() { return iterator( m_Array, m_Array ); } T& operator[]( size_t index ) { return *(begin() + index); } }; }; ================================================ FILE: include/VideoDecode.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class Resource; constexpr UINT MAX_OUTSTANDING_DECODER_COMPRESSED_BUFFERS = 8; typedef enum { VIDEO_DECODE_CONFIG_SPECIFIC_NONE = 0, VIDEO_DECODE_CONFIG_SPECIFIC_BUFFER_REMAP = 0x5B, // set by accelerator, VC1 only VIDEO_DECODE_CONFIG_SPECIFIC_POST_PROCESSING_OFF = 0x03, // set by accelerator, VC1 only. VIDEO_DECODE_CONFIG_SPECIFIC_SP_FRAME = 1 << 7, // set by accelerator, VC1 only (MEDIASUBTYPE_VC1S) VIDEO_DECODE_CONFIG_SPECIFIC_ALIGNMENT_HEIGHT = 1 << 12, // set by accelerator VIDEO_DECODE_CONFIG_SPECIFIC_DOWNSAMPLING = 1 << 13, // set by host decoder VIDEO_DECODE_CONFIG_SPECIFIC_ARRAY_OF_TEXTURES = 1 << 14, // set by accelerator VIDEO_DECODE_CONFIG_SPECIFIC_REUSE_DECODER = 1 << 15, // set by accelerator - This bit means that the decoder can be re-used with resolution change and bit depth change (including profile GUID change from 8bit to 10bit and vice versa). } VIDEO_DECODE_CONFIG_SPECIFIC_FLAGS; typedef enum { VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_8_BIT = 0, VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_10_BIT = 1, VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_16_BIT = 2, VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_MAX, // Keep at end to inform array size. } VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX; typedef enum { VIDEO_DECODE_PROFILE_BIT_DEPTH_NONE = 0, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT = (1 << VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_8_BIT), VIDEO_DECODE_PROFILE_BIT_DEPTH_10_BIT = (1 << VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_10_BIT), VIDEO_DECODE_PROFILE_BIT_DEPTH_16_BIT = (1 << VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_16_BIT), } VIDEO_DECODE_PROFILE_BIT_DEPTH; constexpr VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX GetIndex(VIDEO_DECODE_PROFILE_BIT_DEPTH BitDepth) { return static_cast(BitDepth >> 1);} typedef enum { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX, VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, VIDEO_DECODE_BUFFER_TYPE_SIZEOF } VIDEO_DECODE_BUFFER_TYPE; typedef struct { GUID DecodeProfile; UINT Width; UINT Height; DXGI_FORMAT DecodeFormat; } VIDEO_DECODE_DESC; typedef struct { USHORT ConfigDecoderSpecific; D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE InterlaceType; } VIDEO_DECODE_CONFIG; typedef struct { VIDEO_DECODE_DESC Desc; VIDEO_DECODE_CONFIG Config; } VideoDecodeCreationArgs; typedef struct VIDEO_DECODE_COMPRESSED_BITSTREAM { Resource* pBuffer; UINT64 Offset; UINT Size; } VIDEO_DECODE_COMPRESSED_BITSTREAM; typedef struct VIDEO_DECODE_OUTPUT_CONVERSION_ARGUMENTS { BOOL Enable; DXGI_COLOR_SPACE_TYPE OutputColorSpace; D3D12_VIDEO_SAMPLE ReferenceInfo; UINT ReferenceFrameCount; } VIDEO_DECODE_OUTPUT_CONVERSION_ARGUMENTS; typedef struct VIDEO_DECODE_DECRYPTION_ARGUMENTS { _Field_size_opt_(KeyInfoSize) void* pKeyInfo; UINT KeyInfoSize; _Field_size_(IVSize) const void* pIV; UINT IVSize; _Field_size_opt_(SubSampleMappingCount) const void*pSubSampleMappingBlock; UINT SubSampleMappingCount; UINT cBlocksStripeEncrypted; UINT cBlocksStripeClear; } VIDEO_DECODE_DECRYPTION_ARGUMENTS; typedef struct VIDEO_DECODE_INPUT_STREAM_ARGUMENTS { D3D12_VIDEO_DECODE_FRAME_ARGUMENT FrameArguments[D3D12_VIDEO_DECODE_MAX_ARGUMENTS]; UINT FrameArgumentsCount; VIDEO_DECODE_COMPRESSED_BITSTREAM CompressedBitstream; VIDEO_DECODE_DECRYPTION_ARGUMENTS DecryptionArguments; } VIDEO_DECODE_INPUT_STREAM_ARGUMENTS; typedef struct VIDEO_DECODE_COMPONENT_HISTOGRAM { UINT64 Offset; Resource* pBuffer; } VIDEO_DECODE_COMPONENT_HISTOGRAM; const UINT VIDEO_DECODE_MAX_HISTOGRAM_COMPONENTS = 4; typedef struct VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS { Resource* pOutputTexture2D; CViewSubresourceSubset SubresourceSubset; VIDEO_DECODE_OUTPUT_CONVERSION_ARGUMENTS ConversionArguments; VIDEO_DECODE_COMPONENT_HISTOGRAM Histograms[VIDEO_DECODE_MAX_HISTOGRAM_COMPONENTS]; } VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS; class VideoDecode : public DeviceChild { public: friend class ImmediateContext; VideoDecode(_In_ ImmediateContext *pDevice, VideoDecodeCreationArgs const& args); virtual ~VideoDecode() noexcept; static HRESULT GetVideoDecoderBufferTypeCount(_In_ const VIDEO_DECODE_DESC *pDesc, _Out_ UINT *pBufferTypeCount) noexcept; static void GetVideoDecoderBufferInfo(_In_ const VIDEO_DECODE_DESC *pDesc, _In_ UINT Index, _Out_ VIDEO_DECODE_BUFFER_TYPE *pType, _Out_ UINT *pSize, bool IsXbox); static void GetVideoDecoderConfigCount(_In_ ID3D12Device *pDevice12, UINT NodeIndex, _In_ const VIDEO_DECODE_DESC *pDesc, _Out_ UINT *pConfigCount); static void GetVideoDecoderConfig(_In_ ID3D12Device *pDevice12, UINT NodeIndex, _In_ const VIDEO_DECODE_DESC *pDesc, UINT configIndex, _Out_ VIDEO_DECODE_CONFIG *pConfig, bool IsXbox); void DecodeFrame(_In_ const VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *pInputArguments, _In_ const VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS *pOutputArguments); HRESULT GetDecodingStatus(_Out_writes_bytes_(dataSize) void* pData, UINT dataSize) noexcept; bool IsArrayOfTexturesEnabled() const { return (m_ConfigDecoderSpecific & VIDEO_DECODE_CONFIG_SPECIFIC_ARRAY_OF_TEXTURES) == VIDEO_DECODE_CONFIG_SPECIFIC_ARRAY_OF_TEXTURES; } static VIDEO_DECODE_PROFILE_TYPE GetProfileType(_In_ REFGUID DecodeProfile) noexcept; protected: void ManageResolutionChange(_In_ const VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS *pOutputArguments); HRESULT GetDecodeFrameInfo(_Out_ UINT *pWidth, _Out_ UINT *pHeight, _Out_ UINT16 *pMaxDPB) noexcept; static VIDEO_DECODE_PROFILE_BIT_DEPTH GetProfileBitDepth(_In_ REFGUID DecodeProfile) noexcept; static VIDEO_DECODE_PROFILE_BIT_DEPTH GetFormatBitDepth(DXGI_FORMAT Format) noexcept; GUID GetDecodeProfile(VIDEO_DECODE_PROFILE_TYPE ProfileType, VIDEO_DECODE_PROFILE_BIT_DEPTH BitDepth) noexcept; void LogPicParams() const; void ReleaseUnusedReferences(); void UpdateCurrPic(_In_ Resource* pTexture2D, UINT subresourceIndex); void PrepareForDecodeFrame(_In_ const VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *pInputArguments, _In_ const VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS *pOutputArguments); void CachePicParams(_In_ const VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *pInputArguments); void *GetPicParams() { return m_modifiablePicParams.get(); } template T *GetPicParams() { return static_cast(GetPicParams());} void *GetPicParams() const { return m_modifiablePicParams.get(); } template T *GetPicParams() const { return static_cast(GetPicParams());} void GetStatusReportFeedbackNumber(_Out_ UINT& statusReportFeedbackNumber, _Out_ DXVA_PicEntry& CurrPic, _Out_ UCHAR& field_pic_flag) noexcept; static void GetVideoDecoderSupport(_In_ ID3D12Device *pDevice12, UINT NodeIndex, _In_ const VIDEO_DECODE_DESC *pDesc, _Out_ D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT &decodeSupport); std::optional m_DecodeProfilePerBitDepth[VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX_MAX]; unique_comptr m_spVideoDevice; std::unique_ptr m_spVideoDecoder; D3D12_VIDEO_DECODER_DESC m_decoderDesc = {}; D3D12_VIDEO_DECODER_HEAP_DESC m_decoderHeapDesc = {}; D3D12_VIDEO_DECODE_TIER m_tier = D3D12_VIDEO_DECODE_TIER_NOT_SUPPORTED; DXGI_FORMAT m_decodeFormat; D3D12_VIDEO_DECODE_CONFIGURATION_FLAGS m_configurationFlags = D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_NONE; const VIDEO_DECODE_PROFILE_TYPE m_profileType; ReferenceDataManager m_referenceDataManager {DeviceChild::m_pParent, m_profileType}; std::shared_ptr m_spCurrentDecoderHeap; std::unique_ptr m_modifiablePicParams; UINT m_modifiablePicParamsAllocationSize = 0; USHORT m_ConfigDecoderSpecific = 0; VideoDecodeStatistics m_decodingStatus; }; class BatchedVideoDecode : public BatchedDeviceChildImpl { public: BatchedVideoDecode(BatchedContext& Context, VideoDecodeCreationArgs const& Args) : BatchedDeviceChildImpl(Context, Args) { } void DecodeFrame(_In_ const VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *pInputArguments, _In_ const VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS *pOutputArguments) { FlushBatchAndGetImmediate().DecodeFrame(pInputArguments, pOutputArguments); } HRESULT GetDecodingStatus(_Out_writes_bytes_(dataSize) void* pData, UINT dataSize) noexcept { return FlushBatchAndGetImmediate().GetDecodingStatus(pData, dataSize); } }; }; ================================================ FILE: include/VideoDecodeStatistics.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { typedef enum { VIDEO_DECODE_PROFILE_TYPE_NONE, VIDEO_DECODE_PROFILE_TYPE_VC1, VIDEO_DECODE_PROFILE_TYPE_MPEG2, VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2, VIDEO_DECODE_PROFILE_TYPE_H264, VIDEO_DECODE_PROFILE_TYPE_HEVC, VIDEO_DECODE_PROFILE_TYPE_VP9, VIDEO_DECODE_PROFILE_TYPE_VP8, VIDEO_DECODE_PROFILE_TYPE_H264_MVC, VIDEO_DECODE_PROFILE_TYPE_MAX_VALID // Keep at the end to inform static asserts } VIDEO_DECODE_PROFILE_TYPE; typedef struct _DXVA_PicEntry { union { struct { UCHAR Index7Bits : 7; UCHAR AssociatedFlag : 1; }; UCHAR bPicEntry; }; } DXVA_PicEntry; class VideoDecodeStatistics : public DeviceChild { public: VideoDecodeStatistics(ImmediateContext* pDevice); ~VideoDecodeStatistics(); void EndQuery(UINT StatusReportFeedbackNumber, const DXVA_PicEntry& CurrPic, UCHAR field_pic_flag = 0) noexcept; void ReadAvailableData(ID3D12VideoDecoder* pVideoDecoder, VIDEO_DECODE_PROFILE_TYPE profileType, BYTE* pData, SIZE_T DataSize); protected: typedef struct _StatisticsInfo { UINT64 CompletedFenceId = UINT64_MAX; UINT StatusReportFeedbackNumber = 0; DXVA_PicEntry CurrPic = {}; UCHAR field_pic_flag = 0; } StatisticsInfo; static SIZE_T GetResultOffsetForIndex(UINT Index); static SIZE_T GetStatStructSize(VIDEO_DECODE_PROFILE_TYPE profileType); std::vector m_StatisticsInfo; unique_comptr m_spQueryHeap; D3D12ResourceSuballocation m_ResultBuffer; UINT16 m_SubmissionIndex = 0; UINT16 m_ResultCount; }; }; // namespace D3D12TranslationLayer ================================================ FILE: include/VideoDevice.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { //================================================================================================================================== // VideoDevice // Stores data responsible for remapping D3D11 video functionality to underlying D3D12 video functionality //================================================================================================================================== class VideoDevice : public DeviceChild { public: friend class ImmediateContext; VideoDevice(_In_ ImmediateContext *pDevice) : DeviceChild(pDevice) { Initialize(); } virtual ~VideoDevice() noexcept; public: void TRANSLATION_API GetVideoDecoderProfileCount(_Out_ UINT *pProfileCount); void TRANSLATION_API GetVideoDecoderProfile(_In_ UINT Index, _Out_ GUID *pProfile) ; void TRANSLATION_API GetVideoDecoderFormatCount(_In_ const GUID *pDecodeProfile, _Out_ UINT *pFormatCount); void TRANSLATION_API GetVideoDecoderFormat(_In_ const GUID *pDecodeProfile, UINT Index, _Out_ DXGI_FORMAT *pFormat); void TRANSLATION_API CheckVideoDecoderFormat(_In_ const GUID *pDecodeProfile, _In_ DXGI_FORMAT format, _Out_ BOOL *pSupported); void TRANSLATION_API GetVideoDecoderConfigCount(_In_ const VIDEO_DECODE_DESC *pDesc, _Out_ UINT *pCount); void TRANSLATION_API GetVideoDecoderConfig(_In_ const VIDEO_DECODE_DESC *pDesc, _In_ UINT Index, _Out_ VIDEO_DECODE_CONFIG *pConfig); void TRANSLATION_API GetVideoDecoderBufferTypeCount(_In_ const VIDEO_DECODE_DESC *pDesc, _Out_ UINT *pCount); void TRANSLATION_API GetVideoDecoderBufferInfo(_In_ const VIDEO_DECODE_DESC *pDesc, _In_ UINT Index, _Out_ VIDEO_DECODE_BUFFER_TYPE *pType, _Out_ UINT *pSize); void TRANSLATION_API CheckFeatureSupport(D3D12_FEATURE_VIDEO FeatureVideo, _Inout_updates_bytes_(FeatureSupportDataSize)void* pFeatureSupportData, UINT FeatureSupportDataSize); protected: ID3D12VideoDevice* GetUnderlyingVideoDevice() noexcept { return m_spVideoDevice.get(); } bool IsProfileSupported(REFGUID DecodeProfile) noexcept; struct ProfileInfo { GUID profileGUID; std::vector formats; }; unique_comptr m_spVideoDevice; std::vector m_decodeProfiles; private: void Initialize(); }; }; ================================================ FILE: include/VideoProcess.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include namespace D3D12TranslationLayer { const UINT MIN_SUPPORTED_INPUT_STREAMS_VIA_EMULATION = 3; struct VideoProcessView { Resource* pResource = nullptr; CViewSubresourceSubset SubresourceSubset; }; struct VIDEO_PROCESS_ORIENTATION_INFO { D3D12_VIDEO_PROCESS_ORIENTATION Rotation = D3D12_VIDEO_PROCESS_ORIENTATION_DEFAULT; BOOL FlipHorizontal = FALSE; BOOL FlipVertical = FALSE; }; struct VIDEO_PROCESS_STREAM_INFO { BOOL StereoFormatSwapViews = FALSE; VIDEO_PROCESS_ORIENTATION_INFO OrientationInfo = {}; BOOL EnableSourceRect = FALSE; BOOL EnableDestinationRect = FALSE; BOOL ColorSpaceSet = FALSE; UINT OutputIndex = 0; UINT InputFrameOrField = 0; struct { VideoProcessView CurrentFrame; std::vector PastFrames; std::vector FutureFrames; std::vector PastSubresources; std::vector FutureSubresources; std::vector D3D12ResourcePastFrames; std::vector D3D12ResourceFutureFrames; } ResourceSet[D3D12_VIDEO_PROCESS_STEREO_VIEWS]; }; struct VIDEO_PROCESS_INPUT_ARGUMENTS { void ResetStreams(UINT NumStreams) { StreamInfo.resize(NumStreams); D3D12InputStreamArguments.resize(NumStreams); D3D12InputStreamDesc.resize(NumStreams); for (DWORD i = 0; i < NumStreams; i++) { // input stream arguments ZeroMemory(&D3D12InputStreamArguments[i].InputStream, sizeof(D3D12InputStreamArguments[i].InputStream)); ZeroMemory(&D3D12InputStreamArguments[i].Transform, sizeof(D3D12InputStreamArguments[i].Transform)); D3D12InputStreamArguments[i].Flags = D3D12_VIDEO_PROCESS_INPUT_STREAM_FLAG_NONE; ZeroMemory(&D3D12InputStreamArguments[i].RateInfo, sizeof(D3D12InputStreamArguments[i].RateInfo)); ZeroMemory(&D3D12InputStreamArguments[i].FilterLevels, sizeof(D3D12InputStreamArguments[i].FilterLevels)); ZeroMemory(&D3D12InputStreamArguments[i].AlphaBlending, sizeof(D3D12InputStreamArguments[i].AlphaBlending)); // input stream descriptor D3D12InputStreamDesc[i].Format = DXGI_FORMAT_UNKNOWN; D3D12InputStreamDesc[i].ColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; D3D12InputStreamDesc[i].SourceAspectRatio.Numerator = 1; D3D12InputStreamDesc[i].SourceAspectRatio.Denominator = 1; D3D12InputStreamDesc[i].DestinationAspectRatio.Numerator = 1; D3D12InputStreamDesc[i].DestinationAspectRatio.Denominator = 1; D3D12InputStreamDesc[i].FrameRate.Numerator = 30; D3D12InputStreamDesc[i].FrameRate.Denominator = 1; ZeroMemory(&D3D12InputStreamDesc[i].SourceSizeRange, sizeof(D3D12InputStreamDesc[i].SourceSizeRange)); ZeroMemory(&D3D12InputStreamDesc[i].DestinationSizeRange, sizeof(D3D12InputStreamDesc[i].DestinationSizeRange)); D3D12InputStreamDesc[i].EnableOrientation = FALSE; D3D12InputStreamDesc[i].FilterFlags = D3D12_VIDEO_PROCESS_FILTER_FLAG_NONE; D3D12InputStreamDesc[i].StereoFormat = D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE; D3D12InputStreamDesc[i].FieldType = D3D12_VIDEO_FIELD_TYPE_NONE; D3D12InputStreamDesc[i].DeinterlaceMode = D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_NONE; D3D12InputStreamDesc[i].EnableAlphaBlending = FALSE; ZeroMemory(&D3D12InputStreamDesc[i].LumaKey, sizeof(D3D12InputStreamDesc[i].LumaKey)); D3D12InputStreamDesc[i].EnableAutoProcessing = FALSE; D3D12InputStreamDesc[i].NumPastFrames = 0; D3D12InputStreamDesc[i].NumFutureFrames = 0; // stream info for (DWORD view = 0; view < D3D12_VIDEO_PROCESS_STEREO_VIEWS; view++) { StreamInfo[i].ResourceSet[view].PastFrames.clear(); StreamInfo[i].ResourceSet[view].FutureFrames.clear(); } } } void PrepareResources(_In_ UINT stream, _In_ UINT view); void PrepareStreamArguments(_In_ UINT stream); void PrepareTransform(_In_ UINT stream); D3D12_VIDEO_PROCESS_ORIENTATION FinalOrientation(_In_ D3D12_VIDEO_PROCESS_ORIENTATION Rotation, _In_ BOOL FlipHorizontal, _In_ BOOL FlipVertical); void TransitionResources(_In_ ImmediateContext *pParent, _In_ UINT stream, _In_ UINT view); std::vector StreamInfo; std::vector D3D12InputStreamArguments; std::vector D3D12InputStreamDesc; }; struct VIDEO_PROCESS_OUTPUT_ARGUMENTS { VIDEO_PROCESS_OUTPUT_ARGUMENTS() { // output stream arguments ZeroMemory(D3D12OutputStreamArguments.OutputStream, sizeof(D3D12OutputStreamArguments.OutputStream)); ZeroMemory(&D3D12OutputStreamArguments.TargetRectangle, sizeof(D3D12OutputStreamArguments.TargetRectangle)); // output stream desc D3D12OutputStreamDesc.Format = DXGI_FORMAT_UNKNOWN; D3D12OutputStreamDesc.ColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; D3D12OutputStreamDesc.AlphaFillMode = D3D12_VIDEO_PROCESS_ALPHA_FILL_MODE_OPAQUE; D3D12OutputStreamDesc.AlphaFillModeSourceStreamIndex = 0; ZeroMemory(D3D12OutputStreamDesc.BackgroundColor, sizeof(D3D12OutputStreamDesc.BackgroundColor)); ZeroMemory(&D3D12OutputStreamDesc.FrameRate, sizeof(D3D12OutputStreamDesc.FrameRate)); D3D12OutputStreamDesc.EnableStereo = FALSE; D3D12OutputStreamDesc.FrameRate.Numerator = 30; D3D12OutputStreamDesc.FrameRate.Denominator = 1; } void PrepareResources(); void PrepareTransform(); void TransitionResources(_In_ ImmediateContext *pParent); // helpers VideoProcessView CurrentFrame[D3D12_VIDEO_PROCESS_STEREO_VIEWS]; bool EnableTargetRect = false; bool ColorSpaceSet = false; bool BackgroundColorYCbCr = false; bool BackgroundColorSet = false; D3D12_VIDEO_PROCESS_OUTPUT_STREAM_ARGUMENTS D3D12OutputStreamArguments; D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC D3D12OutputStreamDesc; }; class DeinterlacePrepass { public: DeinterlacePrepass(ImmediateContext* pDevice, class VideoProcess* pVP, D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS DeinterlaceMode) : m_pParent(pDevice) , m_pVP(pVP) , m_DeinterlaceMode(DeinterlaceMode) { } void Process(_Inout_ VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, UINT NumInputStreams, _In_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments); private: void CreatePipelines(DXGI_FORMAT RTVFormat); void DoDeinterlace(Resource* pSrc, CViewSubresourceSubset SrcSubset, Resource* pDst, bool bTopFrame); using VideoProcessPipelineState = DeviceChildImpl; ImmediateContext* const m_pParent; VideoProcess* const m_pVP; D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS m_DeinterlaceMode; std::vector, 2>> m_spIntermediates; std::map> m_spDeinterlacePSOs; std::unique_ptr m_spRootSig; }; class VideoProcessor : public DeviceChildImpl { public: VideoProcessor(ImmediateContext *pContext, ID3D12VideoDevice* pVideoDeviceNoRef, const D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC* pOutputStreamDesc, UINT NumInputStreamDescs, const D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC *pInputStreamDescs); }; class VideoProcess : public DeviceChild { public: VideoProcess(_In_ ImmediateContext *pDevice, D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS DeinterlaceMode) : DeviceChild(pDevice) , m_Deinterlace(pDevice, this, DeinterlaceMode) { Initialize(); } virtual ~VideoProcess() noexcept; void ProcessFrames(_Inout_updates_(NumInputStreams) VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, _In_ UINT NumInputStreams, _In_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments); private: void Initialize(); protected: unique_comptr m_spVideoDevice; std::unique_ptr m_spVideoProcessor; std::vector m_creationInputDesc; D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC m_creationOutputDesc; UINT m_driverSupportedMaxInputStreams = 0; DeinterlacePrepass m_Deinterlace; void InitializeProcessor(_Inout_updates_(NumInputStreams) VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, UINT NumInputStreams, _Inout_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments); void UpdateNeededMaxPastFutureFrames(_Inout_ D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC &inputDesc, _In_ const D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC &outputDesc); void UpdateInputDescriptor(_Inout_updates_(NumInputStreams) VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, UINT NumInputStreams, _In_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments, _Out_ bool &updated); void UpdateOutputDescriptor(_Inout_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments, _Out_ bool &updated); void EmulateVPBlit(_Inout_updates_(NumInputStreams) VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, _In_ UINT NumInputStreams, _In_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments, _In_ UINT StartStream); }; class BatchedVideoProcess { public: virtual ~BatchedVideoProcess() {} virtual void ProcessFrames(_Inout_updates_(NumInputStreams) VIDEO_PROCESS_INPUT_ARGUMENTS* pInputArguments, _In_ UINT NumInputStreams, _In_ VIDEO_PROCESS_OUTPUT_ARGUMENTS* pOutputArguments) = 0; }; class BatchedVideoProcessImpl : public BatchedDeviceChildImpl, public BatchedVideoProcess { public: BatchedVideoProcessImpl(BatchedContext& Context, D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS DeinterlaceMode) : BatchedDeviceChildImpl(Context, DeinterlaceMode) { } void ProcessFrames(_Inout_updates_(NumInputStreams) VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, _In_ UINT NumInputStreams, _In_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments) override { FlushBatchAndGetImmediate().ProcessFrames(pInputArguments, NumInputStreams, pOutputArguments); } }; }; ================================================ FILE: include/VideoProcessEnum.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { struct VIDEO_PROCESS_ENUM_ARGS { D3D12_VIDEO_FIELD_TYPE InputFieldType = D3D12_VIDEO_FIELD_TYPE_NONE; DXGI_RATIONAL InputFrameRate = {}; UINT InputWidth = 0; UINT InputHeight = 0; DXGI_RATIONAL OutputFrameRate = {}; UINT OutputWidth = 0; UINT OutputHeight = 0; UINT MaxInputStreams = 0; }; typedef enum { VIDEO_PROCESS_CONVERSION_CAPS_NONE = 0x0, VIDEO_PROCESS_CONVERSION_CAPS_LINEAR_SPACE = 0x1, VIDEO_PROCESS_CONVERSION_CAPS_xvYCC = 0x2, VIDEO_PROCESS_CONVERSION_CAPS_RGB_RANGE_CONVERSION = 0x4, VIDEO_PROCESS_CONVERSION_CAPS_YCbCr_MATRIX_CONVERSION = 0x8, VIDEO_PROCESS_CONVERSION_CAPS_NOMINAL_RANGE = 0x10 } VIDEO_PROCESS_CONVERSION_CAPS; typedef struct { D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT dx12Support; VIDEO_PROCESS_CONVERSION_CAPS colorConversionCaps; } VIDEO_PROCESS_SUPPORT; typedef struct { DXGI_RATIONAL Input; DXGI_RATIONAL Output; } FrameRatePair; inline bool GetPow2ScaleExponentFromMax(UINT Size, UINT Max, UINT Min, UINT& Exp) { UINT CurrentSize = Max; for (Exp = 0; CurrentSize > Min; CurrentSize = (CurrentSize + 1) / 2, Exp++) { if (CurrentSize == Size) { return true; } } return (CurrentSize == Size); } inline bool IsScaleSupported(const D3D12_VIDEO_SCALE_SUPPORT& ScaleSupport, UINT OutputWidth, UINT OutputHeight) { bool fSupported = OutputWidth != 0 && OutputWidth <= ScaleSupport.OutputSizeRange.MaxWidth && OutputWidth >= ScaleSupport.OutputSizeRange.MinWidth && OutputHeight != 0 && OutputHeight <= ScaleSupport.OutputSizeRange.MaxHeight && OutputHeight >= ScaleSupport.OutputSizeRange.MinHeight; if (fSupported) { if ((ScaleSupport.Flags & D3D12_VIDEO_SCALE_SUPPORT_FLAG_POW2_ONLY) != 0) { UINT Pow2ScaleExpX, Pow2ScaleExpY; fSupported = GetPow2ScaleExponentFromMax(OutputWidth, ScaleSupport.OutputSizeRange.MaxWidth, ScaleSupport.OutputSizeRange.MinWidth, Pow2ScaleExpX) && GetPow2ScaleExponentFromMax(OutputHeight, ScaleSupport.OutputSizeRange.MaxHeight, ScaleSupport.OutputSizeRange.MinHeight, Pow2ScaleExpY) && Pow2ScaleExpX == Pow2ScaleExpY; } else if ((ScaleSupport.Flags & D3D12_VIDEO_SCALE_SUPPORT_FLAG_EVEN_DIMENSIONS_ONLY) != 0) { fSupported = (OutputWidth & 1) == 0 && (OutputHeight & 1) == 0; } } return fSupported; } struct ReferenceInfo { UINT pastFrames = 0; UINT futureFrames = 0; bool frameRateConversionSupported = false; }; class VideoProcessEnum : public DeviceChild { public: VideoProcessEnum(_In_ ImmediateContext *pDevice) noexcept : DeviceChild(pDevice) {} virtual ~VideoProcessEnum() noexcept {} void Initialize(); void CheckFeatureSupport(D3D12_FEATURE_VIDEO FeatureVideo, _Inout_updates_bytes_(FeatureSupportDataSize)void* pFeatureSupportData, UINT FeatureSupportDataSize); const std::vector &GetVPCapsSupportTuples() const { return m_vpCapsSupportTuples; } const std::vector & GetVPInputFormats() const { return m_vpInputFormats; } const std::vector & GetVPOutputFormats() const { return m_vpOutputFormats; } UINT GetVPInputFormatCount() { return (UINT)m_vpInputFormats.size(); } UINT GetVPOutputFormatCount() { return (UINT)m_vpOutputFormats.size(); } virtual void CacheVideoProcessInfo(_In_ VIDEO_PROCESS_ENUM_ARGS &args); ReferenceInfo UpdateReferenceInfo(D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS DeinterlaceSupport); D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS GetDeinterlaceSupport() { return m_deinterlaceFlags; } bool IsAutoProcessingSupported() { return m_autoprocessingSupported; } protected: bool IsSupported(_In_ const D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT &dx12Support, UINT OutputWidth, UINT OutputHeight); void UpdateReferenceInfo(_In_ D3D12_FEATURE_DATA_VIDEO_PROCESS_REFERENCE_INFO &referenceInfo, _In_ DXGI_RATIONAL &inputFrameRate, _In_ DXGI_RATIONAL &outputFrameRate, _Inout_ UINT &pastFrames, _Inout_ UINT &futureFrames); unique_comptr m_spVideoDevice; std::vector m_vpCapsSupportTuples; std::vector m_vpInputFormats; std::vector m_vpOutputFormats; D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS m_deinterlaceFlags = D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_NONE; bool m_autoprocessingSupported = false; }; }; ================================================ FILE: include/VideoProcessShaders.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_VertexID 0 x 0 VERTID uint x // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float xyzw // vs_5_1 dcl_globalFlags refactoringAllowed dcl_immediateConstantBuffer { { -1.000000, 1.000000, 0.500000, 1.000000}, { 1.000000, 1.000000, 0.500000, 1.000000}, { -1.000000, -1.000000, 0.500000, 1.000000}, { 1.000000, -1.000000, 0.500000, 1.000000} } dcl_input_sgv v0.x, vertex_id dcl_output_siv o0.xyzw, position dcl_temps 1 mov r0.x, v0.x mov o0.xy, icb[r0.x + 0].xyxx mov o0.zw, l(0,0,0.500000,1.000000) ret // Approximately 4 instruction slots used #endif const BYTE g_DeinterlaceVS[] = { 68, 88, 66, 67, 16, 120, 59, 165, 100, 158, 46, 103, 129, 187, 164, 137, 2, 22, 121, 145, 1, 0, 0, 0, 224, 2, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 164, 0, 0, 0, 216, 0, 0, 0, 12, 1, 0, 0, 224, 1, 0, 0, 124, 2, 0, 0, 82, 68, 69, 70, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 1, 5, 254, 255, 0, 5, 0, 0, 60, 0, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 83, 86, 95, 86, 101, 114, 116, 101, 120, 73, 68, 0, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 83, 72, 69, 88, 204, 0, 0, 0, 81, 0, 1, 0, 51, 0, 0, 0, 106, 8, 0, 1, 53, 24, 0, 0, 18, 0, 0, 0, 0, 0, 128, 191, 0, 0, 128, 63, 0, 0, 0, 63, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 0, 63, 0, 0, 128, 63, 0, 0, 128, 191, 0, 0, 128, 191, 0, 0, 0, 63, 0, 0, 128, 63, 0, 0, 128, 63, 0, 0, 128, 191, 0, 0, 0, 63, 0, 0, 128, 63, 96, 0, 0, 4, 18, 16, 16, 0, 0, 0, 0, 0, 6, 0, 0, 0, 103, 0, 0, 4, 242, 32, 16, 0, 0, 0, 0, 0, 1, 0, 0, 0, 104, 0, 0, 2, 1, 0, 0, 0, 54, 0, 0, 5, 18, 0, 16, 0, 0, 0, 0, 0, 10, 16, 16, 0, 0, 0, 0, 0, 54, 0, 0, 6, 50, 32, 16, 0, 0, 0, 0, 0, 70, 144, 144, 0, 10, 0, 16, 0, 0, 0, 0, 0, 54, 0, 0, 8, 194, 32, 16, 0, 0, 0, 0, 0, 2, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 128, 63, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 92, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 92, 0, 0, 0, 30, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255 }; #if 0 // // Generated by Microsoft (R) HLSL Shader Compiler 10.1 // // // Buffer Definitions: // // cbuffer DeinterlaceConstants // { // // uint topFrame; // Offset: 0 Size: 4 // // } // // // Resource Bindings: // // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // inputTexture texture uint4 2darray T0 t0 1 // DeinterlaceConstants cbuffer NA NA CB0 cb0 1 // // // // Input signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Position 0 xyzw 0 POS float xy // // // Output signature: // // Name Index Mask Register SysValue Format Used // -------------------- ----- ------ -------- -------- ------- ------ // SV_Target 0 xyzw 0 TARGET uint xyzw // ps_5_1 dcl_globalFlags refactoringAllowed dcl_constantbuffer CB0[0:0][1], immediateIndexed, space=0 dcl_resource_texture2darray (uint,uint,uint,uint) T0[0:0], space=0 dcl_input_ps_siv linear noperspective v0.xy, position dcl_output o0.xyzw dcl_temps 2 ftou r0.xy, v0.xyxx and r1.x, r0.y, l(1) imad r0.y, -r1.x, CB0[0][0].x, r0.y and r1.x, r0.y, l(1) iadd r1.x, -r1.x, l(1) iadd r1.y, -CB0[0][0].x, l(1) imad r0.z, r1.x, r1.y, r0.y mov r0.w, l(0) ld o0.xyzw, r0.xzww, T0[0].xyzw ret // Approximately 10 instruction slots used #endif const BYTE g_DeinterlacePS[] = { 68, 88, 66, 67, 105, 75, 91, 140, 133, 192, 76, 229, 198, 242, 181, 42, 95, 246, 129, 48, 1, 0, 0, 0, 132, 4, 0, 0, 6, 0, 0, 0, 56, 0, 0, 0, 140, 1, 0, 0, 192, 1, 0, 0, 244, 1, 0, 0, 132, 3, 0, 0, 32, 4, 0, 0, 82, 68, 69, 70, 76, 1, 0, 0, 1, 0, 0, 0, 176, 0, 0, 0, 2, 0, 0, 0, 60, 0, 0, 0, 1, 5, 255, 255, 0, 5, 0, 0, 36, 1, 0, 0, 19, 19, 68, 37, 60, 0, 0, 0, 24, 0, 0, 0, 40, 0, 0, 0, 40, 0, 0, 0, 36, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 140, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 153, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 105, 110, 112, 117, 116, 84, 101, 120, 116, 117, 114, 101, 0, 68, 101, 105, 110, 116, 101, 114, 108, 97, 99, 101, 67, 111, 110, 115, 116, 97, 110, 116, 115, 0, 171, 171, 153, 0, 0, 0, 1, 0, 0, 0, 200, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 116, 111, 112, 70, 114, 97, 109, 101, 0, 100, 119, 111, 114, 100, 0, 171, 0, 0, 19, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 0, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 72, 76, 83, 76, 32, 83, 104, 97, 100, 101, 114, 32, 67, 111, 109, 112, 105, 108, 101, 114, 32, 49, 48, 46, 49, 0, 73, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 15, 3, 0, 0, 83, 86, 95, 80, 111, 115, 105, 116, 105, 111, 110, 0, 79, 83, 71, 78, 44, 0, 0, 0, 1, 0, 0, 0, 8, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 83, 86, 95, 84, 97, 114, 103, 101, 116, 0, 171, 171, 83, 72, 69, 88, 136, 1, 0, 0, 81, 0, 0, 0, 98, 0, 0, 0, 106, 8, 0, 1, 89, 0, 0, 7, 70, 142, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 88, 64, 0, 7, 70, 126, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 68, 0, 0, 0, 0, 0, 0, 100, 32, 0, 4, 50, 16, 16, 0, 0, 0, 0, 0, 1, 0, 0, 0, 101, 0, 0, 3, 242, 32, 16, 0, 0, 0, 0, 0, 104, 0, 0, 2, 2, 0, 0, 0, 28, 0, 0, 5, 50, 0, 16, 0, 0, 0, 0, 0, 70, 16, 16, 0, 0, 0, 0, 0, 1, 0, 0, 7, 18, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 35, 0, 0, 12, 34, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 128, 65, 0, 0, 0, 1, 0, 0, 0, 10, 128, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 0, 0, 7, 18, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 30, 0, 0, 8, 18, 0, 16, 0, 1, 0, 0, 0, 10, 0, 16, 128, 65, 0, 0, 0, 1, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 30, 0, 0, 10, 34, 0, 16, 0, 1, 0, 0, 0, 10, 128, 48, 128, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 64, 0, 0, 1, 0, 0, 0, 35, 0, 0, 9, 66, 0, 16, 0, 0, 0, 0, 0, 10, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 1, 0, 0, 0, 26, 0, 16, 0, 0, 0, 0, 0, 54, 0, 0, 5, 130, 0, 16, 0, 0, 0, 0, 0, 1, 64, 0, 0, 0, 0, 0, 0, 45, 0, 0, 8, 242, 32, 16, 0, 0, 0, 0, 0, 134, 15, 16, 0, 0, 0, 0, 0, 70, 126, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 1, 83, 84, 65, 84, 148, 0, 0, 0, 10, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 84, 83, 48, 92, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 92, 0, 0, 0, 30, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255 }; ================================================ FILE: include/VideoReferenceDataManager.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { constexpr UINT16 DXVA_INVALID_PICTURE_INDEX = 0xFFFF; constexpr UINT16 HEVC_INVALID_PICTURE_INDEX = 0x7F; constexpr UINT16 H264_INVALID_PICTURE_INDEX = 0x7F; constexpr UINT16 VPX_INVALID_PICTURE_INDEX = 0x7F; class VideoDecoder : public DeviceChildImpl { public: VideoDecoder(ImmediateContext *pContext, ID3D12VideoDevice* pVideoDeviceNoRef, const D3D12_VIDEO_DECODER_DESC& desc); D3D12_VIDEO_DECODER_DESC GetDesc() { return GetForImmediateUse()->GetDesc(); } }; class VideoDecoderHeap : public DeviceChildImpl { public: VideoDecoderHeap(ImmediateContext *pContext, ID3D12VideoDevice* pVideoDeviceNoRef, const D3D12_VIDEO_DECODER_HEAP_DESC& desc); D3D12_VIDEO_DECODER_HEAP_DESC GetDesc() { return GetForImmediateUse()->GetDesc(); } }; struct ReferenceOnlyDesc { DXGI_FORMAT Format = DXGI_FORMAT_UNKNOWN; UINT64 Width = 0; UINT Height = 0; }; struct ReferenceDataManager { ReferenceDataManager( _In_ ImmediateContext *pImmediateContext, VIDEO_DECODE_PROFILE_TYPE profileType); UINT Size() const { return (UINT)textures.size(); } bool IsReferenceOnly() { return m_fReferenceOnly; } void Resize(UINT16 dbp, _In_opt_ ReferenceOnlyDesc* pReferenceOnly, bool fArrayOfTexture); void ResetInternalTrackingReferenceUsage(); void ResetReferenceFramesInformation(); template void MarkReferencesInUse(const T (&picEntries)[size]); void MarkReferenceInUse(UINT16 index); void ReleaseUnusedReferences(); UINT16 StoreFutureReference(UINT16 index, _In_ std::shared_ptr& decoderHeap, Resource* pTexture2D, UINT subresourceIndex); template void UpdateEntries(T (&picEntries)[size]); UINT16 UpdateEntry(UINT16 index); template void GetUpdatedEntries(T (&picEntries)[size]); UINT16 GetUpdatedEntry(UINT16 index); void TransitionReferenceOnlyOutput(_Out_ ID3D12Resource*& pOutputReferenceNoRef, _Out_ UINT& OutputSubresource); // D3D12 DecodeFrame Parameters. std::vector textures; std::vector texturesSubresources; std::vector decoderHeapsParameter; protected: struct ReferenceData { std::shared_ptr decoderHeap; unique_comptr referenceOnlyTexture; // Allocated and lifetime managed by translation layer Resource* referenceTexture; // May point to caller allocated resource or referenceOnlyTexture UINT subresourceIndex; UINT16 originalIndex; bool fUsed; }; void TransitionReference(_In_ ReferenceData& referenceData, D3D12_RESOURCE_STATES decodeState); void ResizeDataStructures(UINT size); UINT16 FindRemappedIndex(UINT16 originalIndex); std::vector referenceDatas; ImmediateContext* m_pImmediateContext; UINT16 m_invalidIndex; UINT16 m_currentOutputIndex = 0; bool m_fReferenceOnly = false; bool m_fArrayOfTexture = false; }; //---------------------------------------------------------------------------------------------------------------------------------- template inline void ReferenceDataManager::UpdateEntries(T (&picEntries)[size]) { for (auto& picEntry : picEntries) { picEntry.Index7Bits = UpdateEntry(picEntry.Index7Bits); } } //---------------------------------------------------------------------------------------------------------------------------------- template inline void ReferenceDataManager::GetUpdatedEntries(T (&picEntries)[size]) { for (auto& picEntry : picEntries) { picEntry.Index7Bits = GetUpdatedEntry(picEntry.Index7Bits); } } //---------------------------------------------------------------------------------------------------------------------------------- template inline void ReferenceDataManager::MarkReferencesInUse(const T (&picEntries)[size]) { for (auto& picEntry : picEntries) { MarkReferenceInUse(picEntry.Index7Bits); } } }; ================================================ FILE: include/VideoViewHelper.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { struct VIDEO_DECODER_OUTPUT_VIEW_DESC_INTERNAL { DXGI_FORMAT Format; UINT ArraySlice; }; struct VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL { DXGI_FORMAT Format; // TODO: verify FourCC usage when doing VP work UINT MipSlice; UINT ArraySlice; }; struct VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_INTERNAL { DXGI_FORMAT Format; UINT MipSlice; UINT FirstArraySlice; UINT ArraySize; }; }; ================================================ FILE: include/View.hpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { class Resource; //================================================================================================================================== // View // Stores data responsible for remapping D3D11 views to underlying D3D12 views //================================================================================================================================== template class CViewBindingsImpl { public: CViewBindingsImpl() { D3D12TranslationLayer::InitializeListHead(&m_ViewBindingList); } ~CViewBindingsImpl() { if (IsViewBound()) { D3D12TranslationLayer::RemoveEntryList(&m_ViewBindingList); } } bool IsViewBound() { return !D3D12TranslationLayer::IsListEmpty(&m_ViewBindingList); } void ViewBound(UINT stage, UINT slot) { m_BindPoints[stage].set(slot); } void ViewUnbound(UINT stage, UINT slot) { assert(m_BindPoints[stage][slot]); m_BindPoints[stage].set(slot, false); } private: friend class CResourceBindings; friend class ImmediateContext; friend class Resource; std::bitset m_BindPoints[NumStages]; LIST_ENTRY m_ViewBindingList; }; // These types are purely used to specialize the templated // view class enum class ShaderResourceViewType {}; enum class RenderTargetViewType {}; enum class DepthStencilViewType {}; enum class UnorderedAccessViewType {}; enum class VideoDecoderOutputViewType {}; enum class VideoProcessorInputViewType {}; enum class VideoProcessorOutputViewType {}; template< class TIface > struct CViewMapper; struct D3D12_UNORDERED_ACCESS_VIEW_DESC_WRAPPER { D3D12_UNORDERED_ACCESS_VIEW_DESC m_Desc12; UINT m_D3D11UAVFlags; }; #define DECLARE_VIEW_MAPPER(View, DescType12, TranslationLayerDesc) \ template<> struct CViewMapper<##View##Type> \ { \ typedef TranslationLayerDesc TTranslationLayerDesc; \ typedef D3D12_##DescType12 TDesc12; \ static decltype(&ID3D12Device::Create##View) GetCreate() { return &ID3D12Device::Create##View; } \ } #define DECLARE_VIEW_MAPPER1(View, DescType, TranslationLayerDesc) \ template<> struct CViewMapper<##View##Type> \ { \ typedef TranslationLayerDesc TTranslationLayerDesc; \ typedef DescType TDesc12; \ } DECLARE_VIEW_MAPPER(ShaderResourceView, SHADER_RESOURCE_VIEW_DESC, D3D12_SHADER_RESOURCE_VIEW_DESC); DECLARE_VIEW_MAPPER(RenderTargetView, RENDER_TARGET_VIEW_DESC, D3D12_RENDER_TARGET_VIEW_DESC); DECLARE_VIEW_MAPPER(DepthStencilView, DEPTH_STENCIL_VIEW_DESC, D3D12_DEPTH_STENCIL_VIEW_DESC); DECLARE_VIEW_MAPPER(UnorderedAccessView, UNORDERED_ACCESS_VIEW_DESC, D3D12_UNORDERED_ACCESS_VIEW_DESC_WRAPPER); DECLARE_VIEW_MAPPER1(VideoDecoderOutputView, VIDEO_DECODER_OUTPUT_VIEW_DESC_INTERNAL, VIDEO_DECODER_OUTPUT_VIEW_DESC_INTERNAL); DECLARE_VIEW_MAPPER1(VideoProcessorInputView, VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL, VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL); DECLARE_VIEW_MAPPER1(VideoProcessorOutputView, VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_INTERNAL, VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_INTERNAL); #undef DECLARE_VIEW_MAPPER template struct CViewBindingsMapper { using Type = CViewBindingsImpl<1, 1>; }; template <> struct CViewBindingsMapper { using Type = CViewBindingsImpl; }; template <> struct CViewBindingsMapper { using Type = CViewBindingsImpl<1, D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT>; }; template <> struct CViewBindingsMapper { using Type = CViewBindingsImpl; }; template using CViewBindings = typename CViewBindingsMapper::Type; class ViewBase : public DeviceChild { public: // Methods ViewBase(ImmediateContext* pDevice, Resource* pResource, CViewSubresourceSubset const& Subresources) noexcept; // Note: This is hiding the base class implementation not overriding it // Warning: this method is hidden in the UAV type, and is not virtual // Always ensure that this method is called on the most derived type. void UsedInCommandList(COMMAND_LIST_TYPE commandListType, UINT64 id); public: // Members Resource* const m_pResource; protected: D3D12_CPU_DESCRIPTOR_HANDLE m_Descriptor; UINT m_DescriptorHeapIndex; public: CViewSubresourceSubset m_subresources; UINT m_ViewUniqueness; }; template< class TIface > class View : public ViewBase { public: // Types typedef CViewMapper TMapper; typedef typename CViewMapper::TDesc12 TDesc12; typedef typename CViewMapper::TTranslationLayerDesc TTranslationLayerDesc; struct TBinder { static void Bound(View* pView, UINT slot, EShaderStage stage) { if (pView) pView->ViewBound(slot, stage); } static void Unbound(View* pView, UINT slot, EShaderStage stage) { if (pView) pView->ViewUnbound(slot, stage); } }; public: // Methods static View *CreateView(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false) { return new View(pDevice, Desc, ViewResource); } static void DestroyView(View* pView) { delete pView; } View(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false); ~View() noexcept; const TDesc12& GetDesc12() noexcept; bool IsUpToDate() const noexcept; HRESULT RefreshUnderlying() noexcept; D3D12_CPU_DESCRIPTOR_HANDLE GetRefreshedDescriptorHandle() { HRESULT hr = RefreshUnderlying(); if (FAILED(hr)) { assert(hr != E_INVALIDARG); ThrowFailure(hr); } return m_Descriptor; } void ViewBound(UINT Slot = 0, EShaderStage = e_PS) noexcept; void ViewUnbound(UINT Slot = 0, EShaderStage = e_PS) noexcept; UINT16 GetBindRefs() { return m_BindRefs; } void IncrementBindRefs() { m_BindRefs++; } void DecrementBindRefs() { assert(m_BindRefs > 0); m_BindRefs--; } public: CViewBindings m_currentBindings; private: UINT16 m_BindRefs; TDesc12 m_Desc; // We tamper with m_Desc.Buffer.FirstElement when renaming resources for map discard so it is important that we record the // original first element expected by the API UINT64 APIFirstElement; void UpdateMinLOD(float MinLOD); }; typedef View TSRV; typedef View TRTV; typedef View TDSV; typedef View TUAV; typedef View TVDOV; typedef View TVPIV; typedef View TVPOV; // Counter and Append UAVs have an additional resource allocated // to hold the counter value class UAV : public TUAV { public: UAV(ImmediateContext* pDevice, const TTranslationLayerDesc &Desc, Resource &ViewResource) noexcept(false); ~UAV() noexcept(false); //Note: This is hiding the base class implementation not overriding it void UsedInCommandList(COMMAND_LIST_TYPE commandListType, UINT64 id) { TUAV::UsedInCommandList(commandListType, id); DeviceChild::UsedInCommandList(commandListType, id); } static UAV *CreateView(ImmediateContext* pDevice, const TTranslationLayerDesc &Desc, Resource &ViewResource) noexcept(false) { return new UAV(pDevice, Desc, ViewResource); } void EnsureCounterResource() noexcept(false); void UpdateCounterValue(UINT Value); void CopyCounterToBuffer(ID3D12Resource* pDst, UINT DstOffset) noexcept; public: UINT m_D3D11UAVFlags; D3D12TranslationLayer::unique_comptr m_pCounterResource; }; class CDescriptorHeapManager; struct DescriptorHeapEntry { DescriptorHeapEntry(CDescriptorHeapManager *pDescriptorHeapManager, D3D12_CPU_DESCRIPTOR_HANDLE Descriptor, UINT DescriptorHeapIndex, UINT64 LastUsedCommandListID) : m_pDescriptorHeapManager(pDescriptorHeapManager), m_Descriptor(Descriptor), m_DescriptorHeapIndex(DescriptorHeapIndex), m_LastUsedCommandListID(LastUsedCommandListID) {} D3D12_CPU_DESCRIPTOR_HANDLE m_Descriptor; CDescriptorHeapManager *m_pDescriptorHeapManager; UINT m_DescriptorHeapIndex; UINT64 m_LastUsedCommandListID; }; typedef TSRV SRV; typedef TRTV RTV; typedef TDSV DSV; typedef TVDOV VDOV; typedef TVPIV VPIV; typedef TVPOV VPOV; }; ================================================ FILE: include/View.inl ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { template<> inline void View::UpdateMinLOD(float MinLOD) { switch (m_Desc.ViewDimension) { case D3D12_SRV_DIMENSION_BUFFER: case D3D12_SRV_DIMENSION_TEXTURE2DMS: case D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY: break; case D3D12_SRV_DIMENSION_TEXTURE1D: m_Desc.Texture1D.ResourceMinLODClamp = MinLOD; break; case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: m_Desc.Texture1DArray.ResourceMinLODClamp = MinLOD; break; case D3D12_SRV_DIMENSION_TEXTURE2D: m_Desc.Texture2D.ResourceMinLODClamp = MinLOD; break; case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: m_Desc.Texture2DArray.ResourceMinLODClamp = MinLOD; break; case D3D12_SRV_DIMENSION_TEXTURE3D: m_Desc.Texture3D.ResourceMinLODClamp = MinLOD; break; case D3D12_SRV_DIMENSION_TEXTURECUBE: m_Desc.TextureCube.ResourceMinLODClamp = MinLOD; break; case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: m_Desc.TextureCubeArray.ResourceMinLODClamp = MinLOD; break; } } template< class TIface > inline void View::UpdateMinLOD(float /*MinLOD*/) { // Do nothing } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT GetDynamicBufferOffset(Resource* pBuffer) { UINT64 offset = pBuffer ? pBuffer->GetSubresourcePlacement(0).Offset : 0; assert(offset < (UINT)-1); // D3D11 resources shouldn't be able to produce offsetable buffers of more than UINT_MAX return (UINT)offset; } template inline UINT GetDynamicBufferSize(Resource* pBuffer, UINT offset) { UINT width = pBuffer->GetSubresourcePlacement(0).Footprint.Width; return offset > width ? 0 : width - offset; } template<> inline UINT GetDynamicBufferSize(Resource* pBuffer, UINT offset) { UINT pitch = pBuffer->GetSubresourcePlacement(0).Footprint.RowPitch; return offset > pitch ? 0 : pitch - offset; } //---------------------------------------------------------------------------------------------------------------------------------- template void View::ViewBound(UINT slot, EShaderStage stage) noexcept { m_currentBindings.ViewBound(stage, slot); m_pResource->ViewBound(this, stage, slot); m_pParent->TransitionResourceForBindings(this); } //---------------------------------------------------------------------------------------------------------------------------------- template void View::ViewUnbound(UINT slot, EShaderStage stage) noexcept { m_currentBindings.ViewUnbound(stage, slot); m_pResource->ViewUnbound(this, stage, slot); m_pParent->TransitionResourceForBindings(this); } //---------------------------------------------------------------------------------------------------------------------------------- template const typename View::TDesc12& View::GetDesc12() noexcept { __if_exists(TDesc12::Buffer) { typedef decltype(TDesc12::Buffer) TBufferDesc12; if (m_pResource->AppDesc()->ResourceDimension() == static_cast(D3D11_RESOURCE_DIMENSION_BUFFER)) { UINT Divisor = GetByteAlignment(m_Desc.Format); __if_exists(TBufferDesc12::StructureByteStride) { if (m_Desc.Buffer.StructureByteStride != 0) { Divisor = m_Desc.Buffer.StructureByteStride; } } UINT ByteOffset = GetDynamicBufferOffset(m_pResource); assert(ByteOffset % Divisor == 0); m_Desc.Buffer.FirstElement = APIFirstElement + ByteOffset / Divisor; } } return m_Desc; } //---------------------------------------------------------------------------------------------------------------------------------- template bool View::IsUpToDate() const noexcept { return m_pResource->GetUniqueness() == m_ViewUniqueness; } //---------------------------------------------------------------------------------------------------------------------------------- template HRESULT View::RefreshUnderlying() noexcept { if (m_ViewUniqueness != m_pResource->GetUniqueness()) { UpdateMinLOD(m_pResource->GetMinLOD()); const TDesc12 &Desc = GetDesc12(); (m_pParent->m_pDevice12.get()->*CViewMapper::GetCreate())( m_pResource->GetUnderlyingResource(), &Desc, m_Descriptor); m_ViewUniqueness = m_pResource->GetUniqueness(); return S_OK; } return S_FALSE; } //---------------------------------------------------------------------------------------------------------------------------------- // Specialized because ID3D12Device::CreateUnorderedAccessView takes 2 resources as input template<> inline HRESULT View::RefreshUnderlying() noexcept { // UAVs are always refreshed (to ensure that the proper counter resource is passed in) UAV* p11on12UAV = static_cast(this); const TDesc12 &Desc = GetDesc12(); m_pParent->m_pDevice12.get()->CreateUnorderedAccessView( m_pResource->GetUnderlyingResource(), p11on12UAV->m_pCounterResource.get(), &Desc, m_Descriptor ); m_ViewUniqueness = m_pResource->GetUniqueness(); return S_OK; } //---------------------------------------------------------------------------------------------------------------------------------- // Specialized because no underlying D3D12 video views template<> inline HRESULT View::RefreshUnderlying() noexcept { m_ViewUniqueness = m_pResource->GetUniqueness(); return S_OK; } template<> inline HRESULT View::RefreshUnderlying() noexcept { m_ViewUniqueness = m_pResource->GetUniqueness(); return S_OK; } template<> inline HRESULT View::RefreshUnderlying() noexcept { m_ViewUniqueness = m_pResource->GetUniqueness(); return S_OK; } //---------------------------------------------------------------------------------------------------------------------------------- inline ViewBase::ViewBase(ImmediateContext* pDevice, Resource* pResource, CViewSubresourceSubset const& Subresources) noexcept : DeviceChild(pDevice) , m_pResource(pResource) , m_ViewUniqueness(UINT_MAX) , m_subresources(Subresources) { } //---------------------------------------------------------------------------------------------------------------------------------- template View::View(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false) : ViewBase(pDevice, &ViewResource, CViewSubresourceSubset(Desc, (UINT8)ViewResource.AppDesc()->MipLevels(), (UINT16)ViewResource.AppDesc()->ArraySize(), (UINT8)ViewResource.AppDesc()->NonOpaquePlaneCount() * ViewResource.SubresourceMultiplier())), m_Desc(Desc), m_BindRefs(0), APIFirstElement(0) { __if_exists(TDesc12::Buffer) { APIFirstElement = Desc.Buffer.FirstElement; } m_Descriptor = pDevice->GetViewAllocator().AllocateHeapSlot(&m_DescriptorHeapIndex); // throw( _com_error ) } //---------------------------------------------------------------------------------------------------------------------------------- template View::~View() noexcept { m_pParent->GetViewAllocator().FreeHeapSlot(m_Descriptor, m_DescriptorHeapIndex); } //---------------------------------------------------------------------------------------------------------------------------------- // Specialized because no underlying D3D12 video views template<> inline View::View(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false) : ViewBase(pDevice, &ViewResource, CViewSubresourceSubset(Desc, (UINT8)ViewResource.AppDesc()->MipLevels(), (UINT16)ViewResource.AppDesc()->ArraySize(), (UINT8)ViewResource.AppDesc()->NonOpaquePlaneCount() * ViewResource.SubresourceMultiplier())), m_Desc(Desc), m_BindRefs(0) { } //---------------------------------------------------------------------------------------------------------------------------------- template<> inline View::~View() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- template<> inline View::View(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false) : ViewBase(pDevice, &ViewResource, CViewSubresourceSubset(Desc, (UINT8)ViewResource.AppDesc()->MipLevels(), (UINT16)ViewResource.AppDesc()->ArraySize(), (UINT8)ViewResource.AppDesc()->NonOpaquePlaneCount() * ViewResource.SubresourceMultiplier())), m_Desc(Desc), m_BindRefs(0) { } //---------------------------------------------------------------------------------------------------------------------------------- template<> inline View::~View() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- template<> inline View::View(ImmediateContext* pDevice, const typename TDesc12 &Desc, Resource &ViewResource) noexcept(false) : ViewBase(pDevice, &ViewResource, CViewSubresourceSubset(Desc, (UINT8)ViewResource.AppDesc()->MipLevels(), (UINT16)ViewResource.AppDesc()->ArraySize(), (UINT8)ViewResource.AppDesc()->NonOpaquePlaneCount() * ViewResource.SubresourceMultiplier())), m_Desc(Desc), m_BindRefs(0) { } //---------------------------------------------------------------------------------------------------------------------------------- template<> inline View::~View() noexcept { } }; ================================================ FILE: include/XPlatHelpers.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace XPlatHelpers { #ifdef _WIN32 using Event = HANDLE; constexpr Event InvalidEvent = nullptr; inline void SetEvent(Event e) { ::SetEvent(e); } inline Event CreateEvent() { return ::CreateEvent(nullptr, false, false, nullptr); } inline bool WaitForEvent(Event e, DWORD timeoutMs) { return WaitForSingleObject(e, timeoutMs) == WAIT_OBJECT_0; } inline bool WaitForEvent(Event e) { return WaitForEvent(e, INFINITE); } inline Event DuplicateEvent(Event e) { Event eNew = nullptr; (void)DuplicateHandle(GetCurrentProcess(), e, GetCurrentProcess(), &eNew, 0, FALSE, DUPLICATE_SAME_ACCESS); return eNew; } inline void CloseEvent(Event e) { CloseHandle(e); } inline Event EventFromHANDLE(HANDLE h) { return h; } #else using Event = int; constexpr Event InvalidEvent = -1; inline void SetEvent(Event e) { eventfd_write(e, 1); } inline Event CreateEvent() { return eventfd(0, 0); } inline bool WaitForEvent(Event e) { eventfd_t val; return eventfd_read(e, &val) == 0; } inline bool WaitForEvent(Event e, int timeoutMs) { pollfd fds = { e, POLLIN, 0 }; if (poll(&fds, 1, timeoutMs) && (fds.revents & POLLIN)) { return WaitForEvent(e); } return false; } inline Event DuplicateEvent(Event e) { return dup(e); } inline void CloseEvent(Event e) { close(e); } inline Event EventFromHANDLE(HANDLE h) { return static_cast(reinterpret_cast(h)); } #endif class unique_event { Event m_event = InvalidEvent; public: struct copy_tag {}; unique_event() = default; unique_event(Event e) : m_event(e) { } unique_event(Event e, copy_tag) : m_event(DuplicateEvent(e)) { } unique_event(unique_event&& e) : m_event(e.detach()) { } unique_event& operator=(unique_event&& e) { close(); m_event = e.detach(); return *this; } ~unique_event() { close(); } void close() { if (*this) { CloseEvent(m_event); } m_event = InvalidEvent; } void reset(Event e = InvalidEvent) { close(); m_event = e; } void create() { reset(CreateEvent()); } Event get() { return m_event; } Event detach() { Event e = m_event; m_event = InvalidEvent; return e; } void set() const { SetEvent(m_event); } bool poll() const { return WaitForEvent(m_event, 0); } void wait() const { WaitForEvent(m_event); } operator bool() const { return m_event != InvalidEvent; } }; // This class relies on the fact that modules are void* in both, and using the same Windows API names in the Linux Windows.h. class unique_module { HMODULE _hM = nullptr; public: unique_module() = default; explicit unique_module(HMODULE hM) : _hM(hM) { } explicit unique_module(const char* pCStr) : _hM(LoadLibraryA(pCStr)) { } #ifdef _WIN32 explicit unique_module(const wchar_t* pWStr) : _hM(LoadLibraryW(pWStr)) { } #else explicit unique_module(const wchar_t* pWStr) : _hM(LoadLibraryA(std::wstring_convert>().to_bytes(pWStr).c_str())) { } #endif void reset(HMODULE hM = nullptr) { if (_hM) FreeLibrary(_hM); _hM = hM; } void load(const char* pCStr) { reset(LoadLibraryA(pCStr)); } #ifdef _WIN32 void load(const wchar_t* pWStr) { reset(LoadLibraryW(pWStr)); } #else void load(const wchar_t* pWStr) { *this = unique_module(pWStr); } #endif HMODULE detach() { HMODULE hM = _hM; _hM = nullptr; return hM; } ~unique_module() { reset(); } unique_module(unique_module&& o) : _hM(o.detach()) { } unique_module& operator=(unique_module&& o) { reset(o.detach()); return *this; } HMODULE* get_for_external_load() { reset(); return &_hM; } HMODULE get() const { return _hM; } operator bool() const { return _hM != nullptr; } void* proc_address(const char* pCStr) const { return GetProcAddress(_hM, pCStr); } template T proc_address(const char* pCStr) const { return reinterpret_cast(proc_address(pCStr)); } }; } ================================================ FILE: include/commandlistmanager.inl ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- // This allows one to atomically read 64 bit values inline LONGLONG InterlockedRead64(volatile LONGLONG* p) { return InterlockedCompareExchange64(p, 0, 0); } }; ================================================ FILE: include/pch.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include #include ================================================ FILE: include/segmented_stack.h ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once struct noop_unary { void* operator()(bool) noexcept { return nullptr; } }; //================================================================================================================================== // // segmented_stack // //================================================================================================================================== template< class T, size_t segment_size = 256, typename unary = noop_unary > class segmented_stack { public: typedef T value_type; typedef T* pointer; typedef const T* const_pointer; typedef T& reference; typedef const T& const_reference; typedef size_t size_type; typedef ptrdiff_t difference_type; explicit segmented_stack(const std::nothrow_t&, const unary& notifier = unary()); explicit segmented_stack(const unary& notifier = unary()); // throw( bad_alloc ) void free(); segmented_stack& operator=(segmented_stack&&); segmented_stack(segmented_stack&&); ~segmented_stack(); void set_notifier(const unary&); size_type max_size() const; size_type size() const; size_type capacity() const; void reserve(size_type); // throw( bad_alloc ) void push(const value_type&); // throw( ... ) bool reserve_contiguous(size_type) noexcept; pointer append_contiguous_manually() noexcept; pointer append_contiguous_manually(size_type) noexcept; void swap(segmented_stack< T, segment_size, unary >&); void clear(); bool empty(); class segment_range { protected: friend class segmented_stack; explicit segment_range(void* p) : m_begin(static_cast< pointer >(p)), m_end(static_cast< pointer >(p)) { } pointer m_begin; pointer m_end; public: typedef pointer iterator; iterator begin(); iterator end(); typedef const_pointer const_iterator; const_iterator begin() const; const_iterator end() const; }; typedef std::vector< segment_range > segment_vector; typedef typename segment_vector::iterator segment_iterator; segment_iterator segments_begin(); segment_iterator segments_end(); typedef typename segment_vector::const_iterator const_segment_iterator; const_segment_iterator segments_begin() const; const_segment_iterator segments_end() const; void reserve_additional_segment(); // throw( bad_alloc ) bool reserve_contiguous_alloc(size_type) noexcept; segment_vector m_segments; typename segment_vector::iterator m_last_segment; unary m_notifier; }; //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline segmented_stack< T, segment_size, unary >::segmented_stack(const std::nothrow_t&, const unary& notifier) : m_notifier(notifier) { // Usage of this constructor is dangerous, as m_last_segment doesn't point to anything allocated, while push assumes it does. m_last_segment = m_segments.begin(); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline segmented_stack< T, segment_size, unary >::segmented_stack(const unary& notifier) : // throw( bad_alloc ) m_notifier(notifier) { m_segments.reserve(8); // throw( bad_alloc ) m_last_segment = m_segments.begin(); reserve_additional_segment(); // throw( bad_alloc ) m_last_segment = m_segments.begin(); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline void segmented_stack< T, segment_size, unary >::free() { for (auto segment = m_segments.begin(); segment != m_segments.end(); ++segment) { for (auto it = segment->m_begin; it != segment->m_end; ++it) { it->~T(); } operator delete(segment->m_begin); } } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline auto segmented_stack< T, segment_size, unary >::operator=(segmented_stack&& o) -> segmented_stack& { free(); size_t index = std::distance(o.m_segments.begin(), o.m_last_segment); m_segments = std::move(o.m_segments); m_last_segment = m_segments.begin() + index; m_notifier = std::move(o.m_notifier); return *this; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline segmented_stack< T, segment_size, unary >::segmented_stack(segmented_stack&& o) : m_notifier(std::move(o.m_notifier)) { size_t index = std::distance(o.m_segments.begin(), o.m_last_segment); m_segments = std::move(o.m_segments); m_last_segment = m_segments.begin() + index; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline segmented_stack< T, segment_size, unary >::~segmented_stack() { free(); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline void segmented_stack< T, segment_size, unary >::set_notifier(const unary& notifier) { m_notifier = notifier; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::size_type segmented_stack< T, segment_size, unary >::max_size() const { return size_t(-1) / sizeof(T); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::size_type segmented_stack< T, segment_size, unary >::size() const { return (m_last_segment - m_segments.begin()) * segment_size + (m_last_segment != m_segments.end() ? m_last_segment->m_end - m_last_segment->m_begin : 0); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::size_type segmented_stack< T, segment_size, unary >::capacity() const { return m_segments.size() * segment_size; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline void segmented_stack< T, segment_size, unary >::reserve_additional_segment() // throw( bad_alloc ) { if (m_segments.size() >= m_segments.capacity()) { const size_t cur_capacity = m_segments.capacity(); const size_t max_capacity = m_segments.max_size(); if (capacity() > max_size() - segment_size || cur_capacity >= max_capacity) { throw std::bad_alloc(); } const size_t last_segment = m_last_segment - m_segments.begin(); // Record relative offset before re-allocation. size_t add_capacity = min(max(cur_capacity / 2, size_t(1)), max_capacity - cur_capacity); // STL default policy while (add_capacity) { if (1 == add_capacity) { m_segments.reserve(cur_capacity + add_capacity); // throw( bad_alloc ) break; } else { try { m_segments.reserve(cur_capacity + add_capacity); // throw( bad_alloc ) break; } catch (std::bad_alloc&) { // Try less. } add_capacity /= 2; } } m_last_segment = m_segments.begin() + last_segment; } void* p = m_notifier(true); if (!p) { p = operator new(segment_size * sizeof(T)); // throw( bad_alloc ) } m_segments.push_back(segment_range(p)); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline void segmented_stack< T, segment_size, unary >::reserve(size_type s) // throw( bad_alloc ) { while (s > capacity()) { reserve_additional_segment(); // throw( bad_alloc ) } } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline void segmented_stack< T, segment_size, unary >::push(const value_type& v) // throw( ... ) { if (m_last_segment->m_end >= m_last_segment->m_begin + segment_size) { if (m_last_segment + 1 == m_segments.end()) { reserve_additional_segment(); // throw( bad_alloc ) } ++m_last_segment; } T*& pEnd = m_last_segment->m_end; ASSUME(pEnd); new (pEnd) value_type(v); // throw( ... ) ++(pEnd); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline bool segmented_stack< T, segment_size, unary >::reserve_contiguous(size_type s) noexcept { assert(s <= segment_size); if (size_t(m_last_segment->m_begin + segment_size - m_last_segment->m_end) < s) { return reserve_contiguous_alloc(s); } return true; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::pointer segmented_stack< T, segment_size, unary >::append_contiguous_manually() noexcept { return m_last_segment->m_end; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::pointer segmented_stack< T, segment_size, unary >::append_contiguous_manually(size_type s) noexcept { // Check that reserve_contiguous was called before and was successful: assert(size_t(m_last_segment->m_begin + segment_size - m_last_segment->m_end) >= s); T*& pEnd = m_last_segment->m_end; pointer p = pEnd; pEnd += s; return p; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > __declspec(noinline) inline bool segmented_stack< T, segment_size, unary >::reserve_contiguous_alloc(size_type /*s*/) noexcept { try { if (m_last_segment + 1 == m_segments.end()) { reserve_additional_segment(); // throw( bad_alloc ) } // This effectively pushes undefined values, until the segment transitions to a new empty one. // However, the segment's end pointer is untouched, so that those undefined values can be skipped over // with the appropriate iterators. ++m_last_segment; return true; } catch (std::bad_alloc&) { m_notifier(false); return false; } } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline void segmented_stack< T, segment_size, unary >::swap(segmented_stack< T, segment_size, unary >& o) { m_segments.swap(o.m_segments); std::swap(m_last_segment, o.m_last_segment); std::swap(m_notifier, o.m_notifier); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline void segmented_stack< T, segment_size, unary >::clear() { for (auto segment = m_segments.begin(); segment != m_segments.end(); ++segment) { for (auto it = segment->m_begin; it != segment->m_end; ++it) { it->~T(); } segment->m_end = segment->m_begin; } m_last_segment = m_segments.begin(); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline bool segmented_stack< T, segment_size, unary >::empty() { return m_last_segment == m_segments.begin() && (m_last_segment == m_segments.end() || m_last_segment->m_end == m_last_segment->m_begin); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::segment_iterator segmented_stack< T, segment_size, unary >::segments_begin() { return m_segments.begin(); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::segment_iterator segmented_stack< T, segment_size, unary >::segments_end() { return m_last_segment == m_segments.end() ? m_last_segment : m_last_segment + 1; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::const_segment_iterator segmented_stack< T, segment_size, unary >::segments_begin() const { return m_segments.begin(); } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::const_segment_iterator segmented_stack< T, segment_size, unary >::segments_end() const { return m_last_segment == m_segments.end() ? m_last_segment : m_last_segment + 1; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::segment_range::iterator segmented_stack< T, segment_size, unary >::segment_range::begin() { return m_begin; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::segment_range::iterator segmented_stack< T, segment_size, unary >::segment_range::end() { return m_end; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::segment_range::const_iterator segmented_stack< T, segment_size, unary >::segment_range::begin() const { return m_begin; } //---------------------------------------------------------------------------------------------------------------------------------- template< class T, size_t segment_size, typename unary > inline typename segmented_stack< T, segment_size, unary >::segment_range::const_iterator segmented_stack< T, segment_size, unary >::segment_range::end() const { return m_end; } ================================================ FILE: packages.config ================================================  ================================================ FILE: scripts/BlitHelperShaders.hlsl ================================================ #define RootSig "RootFlags( DENY_GEOMETRY_SHADER_ROOT_ACCESS | " \ "DENY_DOMAIN_SHADER_ROOT_ACCESS | " \ "DENY_HULL_SHADER_ROOT_ACCESS), " \ "DescriptorTable (SRV(t0, numDescriptors=3), visibility=SHADER_VISIBILITY_PIXEL)," /* [0] SRVs for the possible 3 planes*/ \ "RootConstants(num32BitConstants=6, b1, visibility=SHADER_VISIBILITY_VERTEX)," /* [1] src info */ \ "RootConstants(num32BitConstants=1, b2, visibility=SHADER_VISIBILITY_PIXEL)," /* [2] pixel format info */ \ "StaticSampler(s0, filter=FILTER_MIN_MAG_LINEAR_MIP_POINT, addressU = TEXTURE_ADDRESS_CLAMP, addressV = TEXTURE_ADDRESS_CLAMP, addressW = TEXTURE_ADDRESS_CLAMP, visibility=SHADER_VISIBILITY_PIXEL)" /* [2] sampler */ \ // texel coordinates cbuffer srcInfo: register(b1) { int g_srcLeft; int g_srcRight; int g_srcTop; int g_srcBottom; int g_srcWidth; int g_srcHeight; }; cbuffer pixelInfo: register(b2) { int g_srcPixelScalingFactor; }; struct VSOutPSIn { float4 position : SV_Position; float2 texcoordsNorm : TEXCOORD0; int2 texcoords : TEXCOORD1; }; SamplerState g_linearSampler : register(s0); // Linear sampler // Uses given position from constant buffer [RootSignature(RootSig)] VSOutPSIn VSMain(dword input : SV_VertexID) { VSOutPSIn ret; switch (input) { case 0: ret.position = float4(-1.0f, 1.0f, 0.5f, 1.0f); ret.texcoords = int2(g_srcLeft, g_srcTop); break; case 1: ret.position = float4(1.0f, 1.0f, 0.5f, 1.0f); ret.texcoords = int2(g_srcRight, g_srcTop); break; case 2: ret.position = float4(-1.0f, -1.0f, 0.5f, 1.0f); ret.texcoords = int2(g_srcLeft, g_srcBottom); break; case 3: ret.position = float4(1.0f, -1.0f, 0.5f, 1.0f); ret.texcoords = int2(g_srcRight, g_srcBottom); break; } ret.texcoordsNorm = ret.texcoords / float2(g_srcWidth, g_srcHeight); return ret; } // The texture in an arbitrary format taken from the target process Texture2D inputTexture : register(t0); // Basic visualization, works for image data [RootSignature(RootSig)] float4 PSBasic(VSOutPSIn input) : SV_Target { return inputTexture.Sample(g_linearSampler, input.texcoordsNorm); } [RootSignature(RootSig)] float4 PSBasic_SwapRB(VSOutPSIn input) : SV_Target { return inputTexture.Sample(g_linearSampler, input.texcoordsNorm).zyxw; } [RootSignature(RootSig)] float4 PSAlphaOnly(VSOutPSIn input) : SV_Target { float4 color = inputTexture.Sample(g_linearSampler, input.texcoordsNorm); color.rgb = color.a; color.a = 1.0; return color; } [RootSignature(RootSig)] float4 PSDepth(VSOutPSIn input) : SV_Target { float4 rawData = float4(inputTexture.Sample(g_linearSampler, input.texcoordsNorm).rrr, 1); return rawData * rawData; } float4 YUVToRGB(float4 YUVA) { float C = (YUVA.r * 256) - 16; float D = (YUVA.g * 256) - 128; float E = (YUVA.b * 256) - 128; float R = clamp(( 298 * C + 409 * E + 128) / 256, 0, 256); float G = clamp(( 298 * C - 100 * D - 208 * E + 128) / 256, 0, 256); float B = clamp(( 298 * C + 516 * D + 128) / 256, 0, 256); return float4(R / 256, G / 256, B / 256, YUVA.a); } // DXGI_FORMAT_AYUV -> float4(V, U, Y, A) (no subsampling) [RootSignature(RootSig)] float4 PSAYUV(VSOutPSIn input) : SV_TARGET { float4 YUVA = inputTexture.Sample(g_linearSampler, input.texcoordsNorm).bgra; return YUVToRGB(YUVA); } // DXGI_FORMAT_Y410/Y416 -> float4(U, Y, V, A) (no subsampling) [RootSignature(RootSig)] float4 PSY4XX(VSOutPSIn input) : SV_TARGET { float4 YUVA = inputTexture.Sample(g_linearSampler, input.texcoordsNorm).grba; return YUVToRGB(YUVA); } // DXGI_FORMAT_YUY2/Y210/Y216 -> float4(Y0, U, Y1, V) (4:2:2 subsampled) [RootSignature(RootSig)] float4 PSPackedYUV(VSOutPSIn input) : SV_TARGET { float4 YUYV = inputTexture.Sample(g_linearSampler, float2(input.texcoordsNorm.x, input.texcoordsNorm.y)); float4 YUVA = float4(input.texcoords.x % 2 == 0 ? YUYV.r : YUYV.b, YUYV.ga, 1); return YUVToRGB(YUVA); } Texture2D inputTexturePlane1 : register(t1); Texture2D inputTexturePlane2 : register(t2); // DXGI_FORMAT_NV12/NV11/P010/P016/P208/420_OPAQUE -> t0.r = Y, t1.rg = UV (4:2:0, 4:2:2, or 4:1:1 subsampled) [RootSignature(RootSig)] float4 PS2PlaneYUV(VSOutPSIn input) : SV_TARGET { float3 inputYUV = float3(inputTexture.Sample(g_linearSampler, input.texcoordsNorm).r, inputTexturePlane1.Sample(g_linearSampler, input.texcoordsNorm).rg); float4 scaledYUVA = float4(inputYUV.r * g_srcPixelScalingFactor, inputYUV.g * g_srcPixelScalingFactor, inputYUV.b * g_srcPixelScalingFactor, 1); return YUVToRGB(scaledYUVA); } // DXGI_FORMAT_V208/V408 -> t0.r = Y, t1.r = U, t2.r = V (4:4:0 or 4:4:4 subsampled) [RootSignature(RootSig)] float4 PS3PlaneYUV(VSOutPSIn input) : SV_TARGET { float4 YUVA = float4(inputTexture.Sample(g_linearSampler, input.texcoordsNorm).r, inputTexturePlane1.Sample(g_linearSampler, input.texcoordsNorm).r, inputTexturePlane2.Sample(g_linearSampler, input.texcoordsNorm).r, 1); return YUVToRGB(YUVA); } ================================================ FILE: scripts/CompileBlitHelperShaders.cmd ================================================ del tmp.txt del BlitHelperShaders.h fxc /Tvs_5_1 /EVSMain /Vn g_VSMain BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h fxc /Tps_5_1 /EPSBasic /Vn g_PSBasic BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h fxc /Tps_5_1 /EPSBasic_SwapRB /Vn g_PSBasic_SwapRB BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h fxc /Tps_5_1 /EPSAYUV /Vn g_PSAYUV BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h fxc /Tps_5_1 /EPSY4XX /Vn g_PSY4XX BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h fxc /Tps_5_1 /EPSPackedYUV /Vn g_PSPackedYUV BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h fxc /Tps_5_1 /EPS2PlaneYUV /Vn g_PS2PlaneYUV BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h fxc /Tps_5_1 /EPS3PlaneYUV /Vn g_PS3PlaneYUV BlitHelperShaders.hlsl /Fh tmp.txt type tmp.txt >> BlitHelperShaders.h del tmp.txt ================================================ FILE: scripts/CompileVideoProcessShaders.cmd ================================================ del tmp.txt del VideoProcessShaders.h fxc /Tvs_5_1 /EVSMain /Vn g_DeinterlaceVS DeinterlaceShader.hlsl /Fh tmp.txt type tmp.txt >> VideoProcessShaders.h fxc /Tps_5_1 /EPSMain /Vn g_DeinterlacePS DeinterlaceShader.hlsl /Fh tmp.txt type tmp.txt >> VideoProcessShaders.h del tmp.txt ================================================ FILE: scripts/DeinterlaceShader.hlsl ================================================ #define RootSig "RootFlags( DENY_VERTEX_SHADER_ROOT_ACCESS | " \ "DENY_GEOMETRY_SHADER_ROOT_ACCESS | " \ "DENY_DOMAIN_SHADER_ROOT_ACCESS | " \ "DENY_HULL_SHADER_ROOT_ACCESS), " \ "RootConstants(num32BitConstants=1, b0), " \ "DescriptorTable (SRV(t0) )" // Fullscreen quad static const float4 positions[] = { float4(-1, 1, 0.5, 1), // top left float4(1, 1, 0.5, 1), // top right float4(-1, -1, 0.5, 1), // bottom left float4(1, -1, 0.5, 1), // bottom right }; cbuffer DeinterlaceConstants : register(b0) { uint topFrame; }; struct VSOutPSIn { float4 position : SV_Position; }; // Should be called with no VB to draw 4 vertices for a fullscreen quad [RootSignature(RootSig)] VSOutPSIn VSMain(dword input : SV_VertexID) { VSOutPSIn ret; ret.position = positions[input]; return ret; } Texture2DArray inputTexture : register(t0); [RootSignature(RootSig)] uint4 PSMain(VSOutPSIn input) : SV_Target { uint4 pos = uint4(input.position.xy, 0, 0); pos.y -= (pos.y % 2) * topFrame; pos.y += (1 - (pos.y % 2)) * (1 - topFrame); return inputTexture.Load(pos); } ================================================ FILE: src/Allocator.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { ID3D12Resource* InternalHeapAllocator::Allocate(UINT64 size) { // Transfer ownership of ID3D12Resource to the calling allocator return m_pContext->AcquireTransitionableUploadBuffer(m_HeapType, size).release(); } void InternalHeapAllocator::Deallocate(ID3D12Resource* pResource) { m_pContext->ReturnTransitionableBufferToPool( m_HeapType, pResource->GetDesc().Width, std::move(unique_comptr(pResource)), // Guaranteed to be finished since this is only called after // all suballocations have been through the deferred deletion queue m_pContext->GetCompletedFenceValue(CommandListType(m_HeapType))); // Leave ownership to the buffer pool pResource->Release(); } } ================================================ FILE: src/BatchedContext.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- // Ptr on input points to current command, and on output points to next command template TCmd const& GetCommandData(const void*& pPtrToCommandValue) { // Ensure this overall structure is aligned to return pointer to next structure. Internal alignment handled by the compiler. struct alignas(BatchedContext::BatchPrimitive)Temp { UINT CommandValue; TCmd Command; }; Temp const* pPtr = reinterpret_cast(pPtrToCommandValue); pPtrToCommandValue = pPtr + 1; return pPtr->Command; } // Ptr on input points to current command, and on output points to next command template TCmd const& GetCommandDataVariableSize(const void*& pPtrToCommandValue, UINT TEntryCountType::*NumEntries, TEntry const*& entries) { // Compiler ensures that pointer to first entry is aligned correctly struct Temp { UINT CommandValue; TCmd Command; TEntry FirstEntry; }; Temp const* pPtr = reinterpret_cast(pPtrToCommandValue); entries = pPtr->Command.*NumEntries ? &pPtr->FirstEntry : nullptr; // Manually align the pointer to the next command. pPtrToCommandValue = BatchedContext::AlignPtr(&pPtr->FirstEntry + pPtr->Command.*NumEntries); return pPtr->Command; } template struct CommandDispatcher; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetPipelineState(Data.pPSO); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.DrawInstanced(Data.countPerInstance, Data.instanceCount, Data.vertexStart, Data.instanceStart); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.DrawIndexedInstanced(Data.countPerInstance, Data.instanceCount, Data.indexStart, Data.vertexStart, Data.instanceStart); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.Dispatch(Data.x, Data.y, Data.z); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { (void)GetCommandData(pCommandData); ImmCtx.DrawAuto(); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.DrawInstancedIndirect(Data.pBuffer, Data.offset); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.DrawIndexedInstancedIndirect(Data.pBuffer, Data.offset); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.DispatchIndirect(Data.pBuffer, Data.offset); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.IaSetTopology(Data.topology); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { struct Temp { BatchedContext::CmdSetVertexBuffers Cmd; Resource* pFirstVB; } const* pTemp = reinterpret_cast(pCommandData); BatchedContext::CmdSetVertexBuffers const* pCmd = &pTemp->Cmd; auto ppVBs = &pTemp->pFirstVB; // Ptr guaranteed to be aligned because alignof(UINT) <= alignof(Resource*) auto pStrides = reinterpret_cast(ppVBs + pCmd->numVBs); auto pOffsets = pStrides + pCmd->numVBs; // Align pointer to next command pCommandData = BatchedContext::AlignPtr(pOffsets + pCmd->numVBs); ImmCtx.IaSetVertexBuffers(pCmd->startSlot, pCmd->numVBs, ppVBs, pStrides, pOffsets); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.IaSetIndexBuffer(Data.pBuffer, Data.format, Data.offset); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { SRV* const* ppSRVs = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdSetShaderResources::numSRVs, ppSRVs); switch (Data.stage) { case e_VS: ImmCtx.SetShaderResources(Data.startSlot, Data.numSRVs, ppSRVs); break; case e_PS: ImmCtx.SetShaderResources(Data.startSlot, Data.numSRVs, ppSRVs); break; case e_GS: ImmCtx.SetShaderResources(Data.startSlot, Data.numSRVs, ppSRVs); break; case e_HS: ImmCtx.SetShaderResources(Data.startSlot, Data.numSRVs, ppSRVs); break; case e_DS: ImmCtx.SetShaderResources(Data.startSlot, Data.numSRVs, ppSRVs); break; case e_CS: ImmCtx.SetShaderResources(Data.startSlot, Data.numSRVs, ppSRVs); break; } } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { Sampler* const* ppSamplers = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdSetSamplers::numSamplers, ppSamplers); switch (Data.stage) { case e_VS: ImmCtx.SetSamplers(Data.startSlot, Data.numSamplers, ppSamplers); break; case e_PS: ImmCtx.SetSamplers(Data.startSlot, Data.numSamplers, ppSamplers); break; case e_GS: ImmCtx.SetSamplers(Data.startSlot, Data.numSamplers, ppSamplers); break; case e_HS: ImmCtx.SetSamplers(Data.startSlot, Data.numSamplers, ppSamplers); break; case e_DS: ImmCtx.SetSamplers(Data.startSlot, Data.numSamplers, ppSamplers); break; case e_CS: ImmCtx.SetSamplers(Data.startSlot, Data.numSamplers, ppSamplers); break; } } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { struct Temp { BatchedContext::CmdSetConstantBuffers Cmd; Resource* pFirstCB; } const* pTemp = reinterpret_cast(pCommandData); BatchedContext::CmdSetConstantBuffers const* pCmd = &pTemp->Cmd; auto ppCBs = &pTemp->pFirstCB; // Ptr guaranteed to be aligned because alignof(UINT) <= alignof(Resource*) auto pFirstConstant = reinterpret_cast(ppCBs + pCmd->numCBs); auto pNumConstants = pFirstConstant + pCmd->numCBs; // Align pointer to next command pCommandData = BatchedContext::AlignPtr(pNumConstants + pCmd->numCBs); switch (pCmd->stage) { case e_VS: ImmCtx.SetConstantBuffers(pCmd->startSlot, pCmd->numCBs, ppCBs, pFirstConstant, pNumConstants); break; case e_PS: ImmCtx.SetConstantBuffers(pCmd->startSlot, pCmd->numCBs, ppCBs, pFirstConstant, pNumConstants); break; case e_GS: ImmCtx.SetConstantBuffers(pCmd->startSlot, pCmd->numCBs, ppCBs, pFirstConstant, pNumConstants); break; case e_HS: ImmCtx.SetConstantBuffers(pCmd->startSlot, pCmd->numCBs, ppCBs, pFirstConstant, pNumConstants); break; case e_DS: ImmCtx.SetConstantBuffers(pCmd->startSlot, pCmd->numCBs, ppCBs, pFirstConstant, pNumConstants); break; case e_CS: ImmCtx.SetConstantBuffers(pCmd->startSlot, pCmd->numCBs, ppCBs, pFirstConstant, pNumConstants); break; } } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { Resource* const* ppCBs = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdSetConstantBuffersNullOffsetSize::numCBs, ppCBs); switch (Data.stage) { case e_VS: ImmCtx.SetConstantBuffers(Data.startSlot, Data.numCBs, ppCBs, nullptr, nullptr); break; case e_PS: ImmCtx.SetConstantBuffers(Data.startSlot, Data.numCBs, ppCBs, nullptr, nullptr); break; case e_GS: ImmCtx.SetConstantBuffers(Data.startSlot, Data.numCBs, ppCBs, nullptr, nullptr); break; case e_HS: ImmCtx.SetConstantBuffers(Data.startSlot, Data.numCBs, ppCBs, nullptr, nullptr); break; case e_DS: ImmCtx.SetConstantBuffers(Data.startSlot, Data.numCBs, ppCBs, nullptr, nullptr); break; case e_CS: ImmCtx.SetConstantBuffers(Data.startSlot, Data.numCBs, ppCBs, nullptr, nullptr); break; } } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SoSetTargets(4, 0, Data.pBuffers, Data.offsets); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.OMSetRenderTargets(Data.pRTVs, 8, Data.pDSV); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); if (Data.graphics) { ImmCtx.OMSetUnorderedAccessViews(Data.slot, 1, &Data.pUAV, &Data.initialCount); } else { ImmCtx.CsSetUnorderedAccessViews(Data.slot, 1, &Data.pUAV, &Data.initialCount); } } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.OMSetStencilRef(Data.ref); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.OMSetBlendFactor(Data.factor); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetViewport(Data.slot, &Data.viewport); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetNumViewports(Data.num); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetScissorRect(Data.slot, &Data.rect); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetNumScissorRects(Data.num); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetScissorRectEnable(Data.enable); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdClearRenderTargetView::numRects, pRects); ImmCtx.ClearRenderTargetView(Data.pView, Data.color, Data.numRects, pRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdClearDepthStencilView::numRects, pRects); ImmCtx.ClearDepthStencilView(Data.pView, Data.flags, Data.depth, Data.stencil, Data.numRects, pRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdClearUnorderedAccessViewUint::numRects, pRects); ImmCtx.ClearUnorderedAccessViewUint(Data.pView, Data.color, Data.numRects, pRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdClearUnorderedAccessViewFloat::numRects, pRects); ImmCtx.ClearUnorderedAccessViewFloat(Data.pView, Data.color, Data.numRects, pRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdClearVideoDecoderOutputView::numRects, pRects); ImmCtx.ClearVideoDecoderOutputView(Data.pView, Data.color, Data.numRects, pRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdClearVideoProcessorInputView::numRects, pRects); ImmCtx.ClearVideoProcessorInputView(Data.pView, Data.color, Data.numRects, pRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdClearVideoProcessorOutputView::numRects, pRects); ImmCtx.ClearVideoProcessorOutputView(Data.pView, Data.color, Data.numRects, pRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdDiscardView::numRects, pRects); ImmCtx.DiscardView(Data.pView, pRects, Data.numRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { D3D12_RECT const* pRects = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdDiscardResource::numRects, pRects); ImmCtx.DiscardResource(Data.pResource, pRects, Data.numRects); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.GenMips(Data.pSRV, Data.filterType); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.FinalizeUpdateSubresources(Data.pDst, Data.Op, nullptr); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.FinalizeUpdateSubresources(Data.pDst, Data.Op.Base, Data.Op.LocalPlacementDescs); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.Rename(Data.pResource, Data.pRenameResource); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.RenameViaCopy(Data.pResource, Data.pRenameResource, Data.dirtyPlaneMask); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.QueryBegin(Data.pQuery); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.QueryEnd(Data.pQuery); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetPredication(Data.pPredicate, Data.Value); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.ResourceCopy(Data.pDst, Data.pSrc); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.ResourceResolveSubresource(Data.pDst, Data.DstSubresource, Data.pSrc, Data.SrcSubresource, Data.Format); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.ResourceCopyRegion(Data.pDst, Data.DstSubresource, Data.DstX, Data.DstY, Data.DstZ, Data.pSrc, Data.SrcSubresource, &Data.SrcBox); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetResourceMinLOD(Data.pResource, Data.MinLOD); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.CopyStructureCount(Data.pDst, Data.DstOffset, Data.pSrc); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { Resource* const* ppResources = nullptr; auto& Data = GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdRotateResourceIdentities::NumResources, ppResources); ImmCtx.RotateResourceIdentities(ppResources, Data.NumResources); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto pData = reinterpret_cast(pCommandData); const void* pExtensionData = BatchedContext::AlignPtr(pData + 1); pCommandData = reinterpret_cast(pExtensionData) + pData->DataSize; pData->pExt->Dispatch(ImmCtx, pExtensionData, pData->DataSize); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetHardwareProtection(Data.pResource, Data.Value); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.SetHardwareProtectionState(Data.State); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { (void)GetCommandData(pCommandData); ImmCtx.ClearState(); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { struct Temp { BatchedContext::CmdUpdateTileMappings Cmd; D3D12_TILED_RESOURCE_COORDINATE Coords; } const* pTemp = reinterpret_cast(pCommandData); static_assert(alignof(D3D12_TILED_RESOURCE_COORDINATE) == alignof(UINT)); static_assert(alignof(D3D12_TILE_REGION_SIZE) == alignof(UINT)); BatchedContext::CmdUpdateTileMappings const* pCmd = &pTemp->Cmd; // Note: Memory for all arrays is unconditionally allocated, even if null pointers are provided, for pointer math simplicity. D3D12_TILED_RESOURCE_COORDINATE const* pCoords = &pTemp->Coords; auto pRegions = reinterpret_cast(pCoords + pCmd->NumTiledResourceRegions); auto pRangeFlags = reinterpret_cast(pRegions + pCmd->NumTiledResourceRegions); auto pTilePoolStartOffsets = reinterpret_cast(pRangeFlags + pCmd->NumRanges); auto pRangeTileCounts = pTilePoolStartOffsets + pCmd->NumRanges; pCommandData = BatchedContext::AlignPtr(pRangeTileCounts + pCmd->NumRanges); ImmCtx.UpdateTileMappings(pCmd->pTiledResource, pCmd->NumTiledResourceRegions, pCoords, pCmd->bTiledResourceRegionSizesPresent ? pRegions : nullptr, pCmd->pTilePool, pCmd->NumRanges, pCmd->bRangeFlagsPresent ? pRangeFlags : nullptr, pCmd->bTilePoolStartOffsetsPresent ? pTilePoolStartOffsets : nullptr, pCmd->bRangeTileCountsPresent ? pRangeTileCounts : nullptr, pCmd->Flags); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.CopyTileMappings(Data.pDstTiledResource, &Data.DstStartCoords, Data.pSrcTiledResource, &Data.SrcStartCoords, &Data.TileRegion, Data.Flags); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.CopyTiles(Data.pResource, &Data.StartCoords, &Data.TileRegion, Data.pBuffer, Data.BufferOffset, Data.Flags); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.TiledResourceBarrier(Data.pBefore, Data.pAfter); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); ImmCtx.ResizeTilePool(Data.pTilePool, Data.NewSize); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext&, const void*& pCommandData) { auto& Data = GetCommandData(pCommandData); // Note: The D3D11 runtime also recursively executes nested command lists, so this is probably safe to do. Data.pThis->ProcessBatchImpl(Data.pBatch); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { const wchar_t* name = nullptr; (void)GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdSetMarker::NumChars, name); ImmCtx.SetMarker(name); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { const wchar_t* name = nullptr; (void)GetCommandDataVariableSize(pCommandData, &BatchedContext::CmdBeginEvent::NumChars, name); ImmCtx.BeginEvent(name); } }; template <> struct CommandDispatcher { static void Execute(ImmediateContext& ImmCtx, const void*& pCommandData) { (void)GetCommandData(pCommandData); ImmCtx.EndEvent(); } }; //---------------------------------------------------------------------------------------------------------------------------------- // These structs generate an array consisting of the functions defined above, // in order of their command value, so it can be indexed by value as a dispatch table. template struct DispatchArrayImpl { // Generic case, just grab value from below. // This is recursive, so Rest starts from empty, to N, to (N-1, N), etc, until 0 is the first value. // Then it triggers the base. static constexpr auto& value = DispatchArrayImpl::value; }; template struct DispatchArrayImpl<0, Rest...> { // Base case, define array. static constexpr BatchedContext::DispatcherFunction value[] = { &CommandDispatcher<0>::Execute, &CommandDispatcher::Execute... }; }; // Statics need to be explicitly instantiated. template constexpr BatchedContext::DispatcherFunction DispatchArrayImpl<0, Rest...>::value[]; // Instantiate with the correct number of commands. Note that the array generated is inclusive, not exclusive. constexpr auto& DispatchArray = DispatchArrayImpl::value; //---------------------------------------------------------------------------------------------------------------------------------- BatchedContext::BatchedContext(ImmediateContext& ImmCtx, CreationArgs args, Callbacks const& callbacks) : m_ImmCtx(ImmCtx) // WARNING: ImmCtx might not be initialized yet, avoid any access to it during this constructor. , m_CreationArgs(args) , m_DispatchTable(DispatchArray) , m_Callbacks(callbacks) { if (args.SubmitBatchesToWorkerThread) { m_BatchSubmittedSemaphore.m_h = CreateSemaphore(nullptr, 0, c_MaxOutstandingBatches, nullptr); ThrowIfHandleNull(m_BatchSubmittedSemaphore); m_BatchConsumedSemaphore.m_h = CreateSemaphore(nullptr, 0, c_MaxOutstandingBatches, nullptr); ThrowIfHandleNull(m_BatchConsumedSemaphore); m_BatchThread.m_h = CreateThread( nullptr, 0, [](void* pContext) -> DWORD { reinterpret_cast(pContext)->BatchThread(); return 0; }, this, CREATE_SUSPENDED, nullptr); ThrowIfHandleNull(m_BatchThread); ResumeThread(m_BatchThread.m_h); } } //---------------------------------------------------------------------------------------------------------------------------------- BatchedContext::~BatchedContext() { assert(!IsBatchThread()); // Batch producing contexts shouldn't flush on their own. if (!m_CreationArgs.pParentContext) { ProcessBatch(); } if (m_CreationArgs.SubmitBatchesToWorkerThread) { assert(m_NumOutstandingBatches == 0 && m_QueuedBatches.empty()); // When the batch thread wakes up after consuming a semaphore value, and // sees that the queue is empty, it will exit. BOOL value = ReleaseSemaphore(m_BatchSubmittedSemaphore, 1, nullptr); assert(value == TRUE); UNREFERENCED_PARAMETER(value); // Wait for it to exit. WaitForSingleObject(m_BatchThread, INFINITE); } } //---------------------------------------------------------------------------------------------------------------------------------- template void BatchedContext::AddToBatch(BatchStorage& CurrentBatch, TCmd const& command) { assert(!IsBatchThread()); static_assert(std::is_trivially_destructible::value, "Destructors don't get called on batched commands."); struct alignas(BatchPrimitive)Temp { UINT CommandValue; TCmd Command; }; if (!CurrentBatch.reserve_contiguous(sizeof(Temp) / sizeof(BatchPrimitive))) { throw std::bad_alloc(); } Temp* pPtr = reinterpret_cast(CurrentBatch.append_contiguous_manually(sizeof(Temp) / sizeof(BatchPrimitive))); pPtr->CommandValue = TCmd::CmdValue; pPtr->Command = command; } //---------------------------------------------------------------------------------------------------------------------------------- template void BatchedContext::AddToBatchVariableSize(TCmd const& command, UINT NumEntries, TEntry const* entries) { assert(!IsBatchThread()); auto Lock = m_RecordingLock.TakeLock(); static_assert(std::is_trivially_destructible::value, "Destructors don't get called on batched commands."); struct Temp { UINT CommandValue; TCmd Command; TEntry FirstEntry; }; const size_t TotalSizeInBytes = Align(offsetof(Temp, FirstEntry) + sizeof(TEntry) * (NumEntries), sizeof(BatchPrimitive)); const size_t TotalSizeInElements = TotalSizeInBytes / sizeof(BatchPrimitive); if (!m_CurrentBatch.reserve_contiguous(TotalSizeInElements)) { throw std::bad_alloc(); } Temp* pPtr = reinterpret_cast(m_CurrentBatch.append_contiguous_manually(TotalSizeInElements)); pPtr->CommandValue = TCmd::CmdValue; pPtr->Command = command; std::copy(entries, entries + NumEntries, &pPtr->FirstEntry); ++m_CurrentCommandCount; SubmitBatchIfIdle(); } //---------------------------------------------------------------------------------------------------------------------------------- template void BatchedContext::EmplaceInBatch(Args&&... args) { assert(!IsBatchThread()); auto Lock = m_RecordingLock.TakeLock(); static_assert(std::is_trivially_destructible::value, "Destructors don't get called on batched commands."); const size_t CommandSize = TCmd::GetCommandSize(std::forward(args)...); // GetCommandSize must ensure size is aligned correctly. assert(CommandSize % sizeof(BatchPrimitive) == 0); if (!m_CurrentBatch.reserve_contiguous(CommandSize / sizeof(BatchPrimitive))) { throw std::bad_alloc(); } void* pPtr = m_CurrentBatch.append_contiguous_manually(CommandSize / sizeof(BatchPrimitive)); new (pPtr) TCmd(std::forward(args)...); ++m_CurrentCommandCount; SubmitBatchIfIdle(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetPipelineState(PipelineState* pPSO) { AddToBatch(CmdSetPipelineState{ pPSO }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DrawInstanced(UINT countPerInstance, UINT instanceCount, UINT vertexStart, UINT instanceStart) { AddToBatch(CmdDrawInstanced{ countPerInstance, instanceCount, vertexStart, instanceStart }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DrawIndexedInstanced(UINT countPerInstance, UINT instanceCount, UINT indexStart, INT vertexStart, UINT instanceStart) { AddToBatch(CmdDrawIndexedInstanced{ countPerInstance, instanceCount, indexStart, vertexStart, instanceStart }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DrawAuto() { AddToBatch(CmdDrawAuto{}); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DrawIndexedInstancedIndirect(Resource* pBuffer, UINT offset) { AddToBatch(CmdDrawIndexedInstancedIndirect{ pBuffer, offset }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DrawInstancedIndirect(Resource* pBuffer, UINT offset) { AddToBatch(CmdDrawInstancedIndirect{ pBuffer, offset }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::Dispatch(UINT x, UINT y, UINT z) { AddToBatch(CmdDispatch{ x, y, z }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DispatchIndirect(Resource* pBuffer, UINT offset) { AddToBatch(CmdDispatchIndirect{ pBuffer, offset }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::IaSetTopology(D3D12_PRIMITIVE_TOPOLOGY topology) { AddToBatch(CmdSetTopology{ topology }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::IaSetVertexBuffers(UINT StartSlot, __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) UINT NumBuffers, Resource** pVBs, const UINT*pStrides, const UINT* pOffsets) { EmplaceInBatch(StartSlot, NumBuffers, pVBs, pStrides, pOffsets); } BatchedContext::CmdSetVertexBuffers::CmdSetVertexBuffers(UINT _startSlot, UINT _numVBs, Resource* const* _ppVBs, UINT const* _pStrides, UINT const* _pOffsets) : startSlot(_startSlot) , numVBs(_numVBs) { struct Temp { BatchedContext::CmdSetVertexBuffers Cmd; Resource* pFirstVB; } *pTemp = reinterpret_cast(this); auto ppVBs = &pTemp->pFirstVB; // Ptr guaranteed to be aligned because alignof(UINT) <= alignof(Resource*) auto pStrides = reinterpret_cast(ppVBs + numVBs); auto pOffsets = pStrides + numVBs; std::copy(_ppVBs, _ppVBs + numVBs, ppVBs); std::copy(_pStrides, _pStrides + numVBs, pStrides); std::copy(_pOffsets, _pOffsets + numVBs, pOffsets); } size_t BatchedContext::CmdSetVertexBuffers::GetCommandSize(UINT, UINT _numVBs, Resource* const*, UINT const*, UINT const*) { struct Temp { BatchedContext::CmdSetVertexBuffers Cmd; Resource* pFirstVB; }; return Align(offsetof(Temp, pFirstVB) + (sizeof(Resource*) + sizeof(UINT) * 2) * _numVBs, sizeof(BatchPrimitive)); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::IaSetIndexBuffer(Resource* pBuffer, DXGI_FORMAT format, UINT offset) { AddToBatch(CmdSetIndexBuffer{ pBuffer, format, offset }); } //---------------------------------------------------------------------------------------------------------------------------------- template void TRANSLATION_API BatchedContext::SetShaderResources(UINT StartSlot, __in_range(0, D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT) UINT NumSRVs, SRV* const* ppSRVs) { AddToBatchVariableSize(CmdSetShaderResources{ ShaderStage, StartSlot, NumSRVs }, NumSRVs, ppSRVs); } template void TRANSLATION_API BatchedContext::SetShaderResources(UINT, UINT, SRV* const*); template void TRANSLATION_API BatchedContext::SetShaderResources(UINT, UINT, SRV* const*); template void TRANSLATION_API BatchedContext::SetShaderResources(UINT, UINT, SRV* const*); template void TRANSLATION_API BatchedContext::SetShaderResources(UINT, UINT, SRV* const*); template void TRANSLATION_API BatchedContext::SetShaderResources(UINT, UINT, SRV* const*); template void TRANSLATION_API BatchedContext::SetShaderResources(UINT, UINT, SRV* const*); //---------------------------------------------------------------------------------------------------------------------------------- template void TRANSLATION_API BatchedContext::SetSamplers(UINT StartSlot, __in_range(0, D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT) UINT NumSamplers, Sampler** ppSamplers) { AddToBatchVariableSize(CmdSetSamplers{ ShaderStage, StartSlot, NumSamplers }, NumSamplers, ppSamplers ); } template void TRANSLATION_API BatchedContext::SetSamplers(UINT, UINT, Sampler**); template void TRANSLATION_API BatchedContext::SetSamplers(UINT, UINT, Sampler**); template void TRANSLATION_API BatchedContext::SetSamplers(UINT, UINT, Sampler**); template void TRANSLATION_API BatchedContext::SetSamplers(UINT, UINT, Sampler**); template void TRANSLATION_API BatchedContext::SetSamplers(UINT, UINT, Sampler**); template void TRANSLATION_API BatchedContext::SetSamplers(UINT, UINT, Sampler**); //---------------------------------------------------------------------------------------------------------------------------------- template void TRANSLATION_API BatchedContext::SetConstantBuffers(UINT StartSlot, __in_range(0, D3D11_COMMONSHADER_CONSTANT_BUFFER_HW_SLOT_COUNT) UINT NumBuffers, Resource** ppCBs, __in_ecount_opt(NumBuffers) CONST UINT* pFirstConstant, __in_ecount_opt(NumBuffers) CONST UINT* pNumConstants) { if (pFirstConstant) { EmplaceInBatch(ShaderStage, StartSlot, NumBuffers, ppCBs, pFirstConstant, pNumConstants); } else { AddToBatchVariableSize(CmdSetConstantBuffersNullOffsetSize{ ShaderStage, StartSlot, NumBuffers }, NumBuffers, ppCBs); } } template void TRANSLATION_API BatchedContext::SetConstantBuffers(UINT, UINT, Resource** ppCBs, CONST UINT* pFirstConstant, CONST UINT* pNumConstants); template void TRANSLATION_API BatchedContext::SetConstantBuffers(UINT, UINT, Resource** ppCBs, CONST UINT* pFirstConstant, CONST UINT* pNumConstants); template void TRANSLATION_API BatchedContext::SetConstantBuffers(UINT, UINT, Resource** ppCBs, CONST UINT* pFirstConstant, CONST UINT* pNumConstants); template void TRANSLATION_API BatchedContext::SetConstantBuffers(UINT, UINT, Resource** ppCBs, CONST UINT* pFirstConstant, CONST UINT* pNumConstants); template void TRANSLATION_API BatchedContext::SetConstantBuffers(UINT, UINT, Resource** ppCBs, CONST UINT* pFirstConstant, CONST UINT* pNumConstants); template void TRANSLATION_API BatchedContext::SetConstantBuffers(UINT, UINT, Resource** ppCBs, CONST UINT* pFirstConstant, CONST UINT* pNumConstants); BatchedContext::CmdSetConstantBuffers::CmdSetConstantBuffers(EShaderStage _stage, UINT _startSlot, UINT _numCBs, Resource* const* _ppCBs, UINT const* _pFirstConstant, UINT const* _pNumConstants) : stage(_stage) , startSlot(_startSlot) , numCBs(_numCBs) { struct Temp { BatchedContext::CmdSetConstantBuffers Cmd; Resource* pFirstCB; } *pTemp = reinterpret_cast(this); auto ppCBs = &pTemp->pFirstCB; // Ptr guaranteed to be aligned because alignof(UINT) <= alignof(Resource*) auto pFirstConstant = reinterpret_cast(ppCBs + numCBs); auto pNumConstants = pFirstConstant + numCBs; std::copy(_ppCBs, _ppCBs + numCBs, ppCBs); std::copy(_pFirstConstant, _pFirstConstant + numCBs, pFirstConstant); std::copy(_pNumConstants, _pNumConstants + numCBs, pNumConstants); } size_t BatchedContext::CmdSetConstantBuffers::GetCommandSize(EShaderStage, UINT, UINT numCBs, Resource* const*, UINT const*, UINT const*) { struct Temp { BatchedContext::CmdSetConstantBuffers Cmd; Resource* pFirstCB; }; return Align(offsetof(Temp, pFirstCB) + (sizeof(Resource*) + sizeof(UINT) * 2) * numCBs, sizeof(BatchPrimitive)); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SoSetTargets(_In_range_(0, 4) UINT NumTargets, _In_range_(0, 4) UINT, _In_reads_(NumTargets) Resource** pBuffers, _In_reads_(NumTargets) const UINT* offsets) { CmdSetSOBuffers command = {}; std::copy(pBuffers, pBuffers + NumTargets, command.pBuffers); std::copy(offsets, offsets + NumTargets, command.offsets); AddToBatch(command); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::OMSetRenderTargets(__in_ecount(NumRTVs) RTV** ppRTVs, __in_range(0, 8) UINT NumRTVs, __in_opt DSV *pDSV, __in_ecount(NumUavs) UAV** ppUavs, CONST UINT* pInitialCounts, UINT UAVStartSlot, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumUavs) { CmdSetRenderTargets command = {}; std::copy(ppRTVs, ppRTVs + NumRTVs, command.pRTVs); command.pDSV = pDSV; AddToBatch(command); for (UINT i = 0; i < D3D11_1_UAV_SLOT_COUNT; ++i) { UINT slot = i; UINT inputIndex = slot - UAVStartSlot; bool bValidUAVSlot = slot >= NumRTVs && inputIndex < NumUavs; UAV* pUAV = bValidUAVSlot ? ppUavs[inputIndex] : nullptr; if (m_UAVs.UpdateBinding(slot, pUAV) || (pUAV && pInitialCounts[inputIndex] != UINT_MAX)) { AddToBatch(CmdSetUAV{ true, slot, pUAV, pUAV ? pInitialCounts[inputIndex] : UINT_MAX }); } } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::CsSetUnorderedAccessViews(UINT Start, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumViews, __in_ecount(NumViews) UAV** ppUAVs, __in_ecount(NumViews) CONST UINT* pInitialCounts) { for (UINT i = 0; i < NumViews; ++i) { AddToBatch(CmdSetUAV{ false, i + Start, ppUAVs[i], pInitialCounts ? pInitialCounts[i] : UINT_MAX }); } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::OMSetStencilRef(UINT StencilRef) { AddToBatch(CmdSetStencilRef{ StencilRef }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::OMSetBlendFactor(const FLOAT BlendFactor[4]) { AddToBatch(CmdSetBlendFactor{ {BlendFactor[0], BlendFactor[1], BlendFactor[2], BlendFactor[3]} }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetViewports(UINT NumViewports, const D3D12_VIEWPORT* pViewports) { for (UINT i = 0; i < NumViewports; ++i) { if (memcmp(&pViewports[i], &m_Viewports[i], sizeof(D3D12_VIEWPORT)) != 0) { m_Viewports[i] = pViewports[i]; AddToBatch(CmdSetViewport{ i, pViewports[i] }); } } if (m_NumViewports != NumViewports) { AddToBatch(CmdSetNumViewports{ NumViewports }); m_NumViewports = NumViewports; } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetScissorRects(UINT NumRects, const D3D12_RECT* pRects) { for (UINT i = 0; i < NumRects; ++i) { if (memcmp(&pRects[i], &m_Scissors[i], sizeof(D3D12_RECT)) != 0) { m_Scissors[i] = pRects[i]; AddToBatch(CmdSetScissorRect{ i, pRects[i] }); } } if (m_NumScissors != NumRects) { AddToBatch(CmdSetNumScissorRects{ NumRects }); m_NumScissors = NumRects; } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetScissorRectEnable(BOOL enable) { AddToBatch(CmdSetScissorEnable{ enable != 0 }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearRenderTargetView(RTV* pRTV, CONST FLOAT Color[4], UINT NumRects, const D3D12_RECT *pRects) { AddToBatchVariableSize(CmdClearRenderTargetView{ pRTV, Color, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearDepthStencilView(DSV* pDSV, UINT Flags, FLOAT Depth, UINT8 Stencil, UINT NumRects, const D3D12_RECT *pRects) { AddToBatchVariableSize(CmdClearDepthStencilView{ pDSV, Flags, Depth, Stencil, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearUnorderedAccessViewUint(UAV* pUAV, CONST UINT Color[4], UINT NumRects, const D3D12_RECT *pRects) { AddToBatchVariableSize(CmdClearUnorderedAccessViewUint{ pUAV, Color, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearUnorderedAccessViewFloat(UAV* pUAV, CONST FLOAT Color[4], UINT NumRects, const D3D12_RECT *pRects) { AddToBatchVariableSize(CmdClearUnorderedAccessViewFloat{ pUAV, Color, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearVideoDecoderOutputView(VDOV* pVDOV, CONST FLOAT Color[4], UINT NumRects, const D3D12_RECT *pRects) { AddToBatchVariableSize(CmdClearVideoDecoderOutputView{ pVDOV, Color, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearVideoProcessorInputView(VPIV* pVPIV, CONST FLOAT Color[4], UINT NumRects, const D3D12_RECT *pRects) { AddToBatchVariableSize(CmdClearVideoProcessorInputView{ pVPIV, Color, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearVideoProcessorOutputView(VPOV* vPOV, CONST FLOAT Color[4], UINT NumRects, const D3D12_RECT *pRects) { AddToBatchVariableSize(CmdClearVideoProcessorOutputView{ vPOV, Color, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DiscardView(ViewBase* pView, const D3D12_RECT* pRects, UINT NumRects) { AddToBatchVariableSize(CmdDiscardView{ pView, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::DiscardResource(Resource* pResource, const D3D12_RECT* pRects, UINT NumRects) { AddToBatchVariableSize(CmdDiscardResource{ pResource, NumRects }, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::GenMips(SRV* pSRV, D3D12_FILTER_TYPE FilterType) { AddToBatch(CmdGenMips{ pSRV, FilterType }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ResourceUpdateSubresourceUP(Resource* pResource, UINT DstSubresource, _In_opt_ const D3D12_BOX* pDstBox, _In_ const VOID* pMem, UINT SrcPitch, UINT SrcDepth) { D3D11_SUBRESOURCE_DATA SubresourceDesc = { pMem, SrcPitch, SrcDepth }; UINT8 MipLevel, PlaneSlice; UINT16 ArraySlice; DecomposeSubresourceIdxExtended(DstSubresource, pResource->AppDesc()->MipLevels(), pResource->AppDesc()->ArraySize(), MipLevel, ArraySlice, PlaneSlice); const ImmediateContext::CPrepareUpdateSubresourcesHelper PrepareHelper( *pResource, CSubresourceSubset(1, 1, pResource->SubresourceMultiplier(), MipLevel, ArraySlice, PlaneSlice), &SubresourceDesc, pDstBox, ImmediateContext::UpdateSubresourcesFlags::ScenarioBatchedContext, nullptr, 0, m_ImmCtx); if (PrepareHelper.FinalizeNeeded) // Might be a no-op due to box. { if (PrepareHelper.bUseLocalPlacement) { AddToBatch(CmdFinalizeUpdateSubresourcesWithLocalPlacement{ pResource, PrepareHelper.PreparedStorage }); } else { AddToBatch(CmdFinalizeUpdateSubresources{ pResource, PrepareHelper.PreparedStorage.Base }); } } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void TRANSLATION_API BatchedContext::UploadInitialData(Resource* pDst, D3D12TranslationLayer::CSubresourceSubset const& Subresources, const D3D11_SUBRESOURCE_DATA* pSrcData, const D3D12_BOX* pDstBox) { const ImmediateContext::CPrepareUpdateSubresourcesHelper PrepareHelper( *pDst, Subresources, pSrcData, pDstBox, ImmediateContext::UpdateSubresourcesFlags::ScenarioInitialData, nullptr, 0, m_ImmCtx); if (PrepareHelper.FinalizeNeeded) // Might have been written directly to the destination. { auto AddToBatchImpl = [&]() { if (PrepareHelper.bUseLocalPlacement) { AddToBatch(CmdFinalizeUpdateSubresourcesWithLocalPlacement{ pDst, PrepareHelper.PreparedStorage }); } else { AddToBatch(CmdFinalizeUpdateSubresources{ pDst, PrepareHelper.PreparedStorage.Base }); } }; AddToBatchImpl(); } } //---------------------------------------------------------------------------------------------------------------------------------- // Make sure the batch is completed, then call into the immediate context to wait for the GPU. bool TRANSLATION_API BatchedContext::MapUnderlyingSynchronize(BatchedResource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMappedSubresource) { bool result = FlushBatchAndGetImmediateContext().MapUnderlyingSynchronize(pResource->m_pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); return result; } //---------------------------------------------------------------------------------------------------------------------------------- // Make sure the batch is completed, then call into the immediate context to figure out how to do the map. bool TRANSLATION_API BatchedContext::MapDefault(BatchedResource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMappedSubresource) { bool result = FlushBatchAndGetImmediateContext().MapDefault(pResource->m_pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); return result; } //---------------------------------------------------------------------------------------------------------------------------------- // Call thread-safe immediate context methods to acquire a mappable buffer, and queue a rename operation. bool TRANSLATION_API BatchedContext::RenameAndMapBuffer(BatchedResource* pResource, MappedSubresource* pMappedSubresource) { try { auto& ImmCtx = GetImmediateContextNoFlush(); SafeRenameResourceCookie cookie(ImmCtx.CreateRenameCookie(pResource->m_pResource, ResourceAllocationContext::FreeThread)); Resource* pRenameResource = cookie.Get(); pResource->m_LastRenamedResource = pRenameResource->GetIdentity()->m_suballocation; D3D12_RANGE ReadRange = CD3DX12_RANGE(0, 0); void* pData = nullptr; ThrowFailure(pResource->m_LastRenamedResource.Map(0, &ReadRange, &pData)); AddToBatch(CmdRename{ pResource->m_pResource, cookie.Get() }); AddPostBatchFunction([cleanup = cookie.Detach(), &immCtx = GetImmediateContextNoFlush()](){ immCtx.DeleteRenameCookie(cleanup); }); pMappedSubresource->pData = pData; pMappedSubresource->RowPitch = pResource->m_pResource->GetSubresourcePlacement(0).Footprint.RowPitch; pMappedSubresource->DepthPitch = pResource->m_pResource->DepthPitch(0); return true; } catch (_com_error& hrEx) { if (hrEx.Error() == E_OUTOFMEMORY) { (void)FlushBatchAndGetImmediateContext().MapDiscardBuffer(pResource->m_pResource, 0, MAP_TYPE_WRITE_DISCARD, false, nullptr, pMappedSubresource); pResource->m_LastRenamedResource = pResource->m_pResource->GetIdentity()->m_suballocation; return true; } throw; } } //---------------------------------------------------------------------------------------------------------------------------------- // Call thread-safe immediate context methods to acquire a mappable buffer - don't queue anything yet. bool TRANSLATION_API BatchedContext::MapForRenameViaCopy(BatchedResource* pResource, UINT Subresource, MappedSubresource* pMappedSubresource) { if (pResource->m_pResource->GetFormatEmulation() == FormatEmulation::YV12) { return FlushBatchAndGetImmediateContext().MapDynamicTexture(pResource->m_pResource, Subresource, MAP_TYPE_READWRITE, true, nullptr, pMappedSubresource); } else { assert (pResource->m_pResource->GetFormatEmulation() == FormatEmulation::None); UINT MipIndex, PlaneIndex, ArrayIndex; pResource->m_pResource->DecomposeSubresource(Subresource, MipIndex, ArrayIndex, PlaneIndex); auto& ImmCtx = GetImmediateContextNoFlush(); if (!pResource->m_DynamicTexturePlaneData.AnyPlaneMapped()) { SafeRenameResourceCookie cookie(ImmCtx.CreateRenameCookie(pResource->m_pResource, ResourceAllocationContext::FreeThread)); Resource* pRenameResource = cookie.Get(); pResource->m_LastRenamedResource = pRenameResource->GetIdentity()->m_suballocation; pResource->m_PendingRenameViaCopyCookie.Reset(cookie.Detach()); } pResource->m_DynamicTexturePlaneData.m_MappedPlaneRefCount[PlaneIndex]++; pResource->m_DynamicTexturePlaneData.m_DirtyPlaneMask |= (1 << PlaneIndex); D3D12_RANGE ReadRange = CD3DX12_RANGE(0, 0); void* pData = nullptr; ThrowFailure(pResource->m_LastRenamedResource.Map(0, &ReadRange, &pData)); pMappedSubresource->pData = (BYTE*)pData + (pResource->m_pResource->GetSubresourcePlacement(Subresource).Offset - pResource->m_pResource->GetSubresourcePlacement(0).Offset); pMappedSubresource->RowPitch = pResource->m_pResource->GetSubresourcePlacement(Subresource).Footprint.RowPitch; pMappedSubresource->DepthPitch = pResource->m_pResource->DepthPitch(Subresource); return true; } } //---------------------------------------------------------------------------------------------------------------------------------- // Re-map the last-acquired buffer associated with a resource. bool TRANSLATION_API BatchedContext::MapRenamedBuffer(BatchedResource* pResource, MappedSubresource* pMappedSubresource) { D3D12_RANGE ReadRange = CD3DX12_RANGE(0, 0); void* pData = nullptr; ThrowFailure(pResource->m_LastRenamedResource.Map(0, &ReadRange, &pData)); pMappedSubresource->pData = pData; pMappedSubresource->RowPitch = pResource->m_pResource->GetSubresourcePlacement(0).Footprint.RowPitch; pMappedSubresource->DepthPitch = pResource->m_pResource->DepthPitch(0); return true; } //---------------------------------------------------------------------------------------------------------------------------------- // Just unmap the renamed buffer. void TRANSLATION_API BatchedContext::UnmapRenamedBuffer(BatchedResource* pResource, _In_opt_ const D3D12_BOX *pReadWriteRange) { D3D12_RANGE WriteRange = pReadWriteRange ? CD3DX12_RANGE(pReadWriteRange->left, pReadWriteRange->right) : CD3DX12_RANGE(0, static_cast(pResource->m_LastRenamedResource.GetBufferSuballocation().GetSize())); pResource->m_LastRenamedResource.Unmap(0, &WriteRange); } //---------------------------------------------------------------------------------------------------------------------------------- // Map enforced synchronization, just forward to immediate context. void TRANSLATION_API BatchedContext::UnmapDefault(BatchedResource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange) { GetImmediateContextNoFlush().UnmapDefault(pResource->m_pResource, Subresource, pReadWriteRange); } //---------------------------------------------------------------------------------------------------------------------------------- // Map enforced synchronization, just forward to immediate context. void TRANSLATION_API BatchedContext::UnmapStaging(BatchedResource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange) { GetImmediateContextNoFlush().UnmapUnderlyingStaging(pResource->m_pResource, Subresource, pReadWriteRange); } //---------------------------------------------------------------------------------------------------------------------------------- // Unmap the buffer and queue a rename-via-copy operation. void TRANSLATION_API BatchedContext::UnmapAndRenameViaCopy(BatchedResource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange) { UINT MipIndex, PlaneIndex, ArrayIndex; pResource->m_pResource->DecomposeSubresource(Subresource, MipIndex, ArrayIndex, PlaneIndex); assert(pResource->m_DynamicTexturePlaneData.m_MappedPlaneRefCount[PlaneIndex] == 1); pResource->m_DynamicTexturePlaneData.m_MappedPlaneRefCount[PlaneIndex]--; if (!pResource->m_DynamicTexturePlaneData.AnyPlaneMapped()) { D3D12_RANGE WriteRange = pReadWriteRange ? CD3DX12_RANGE(pReadWriteRange->left, pReadWriteRange->right) : CD3DX12_RANGE(0, static_cast(pResource->m_LastRenamedResource.GetBufferSuballocation().GetSize())); pResource->m_LastRenamedResource.Unmap(0, &WriteRange); AddToBatch(CmdRenameViaCopy{ pResource->m_pResource, pResource->m_PendingRenameViaCopyCookie.Get(), pResource->m_DynamicTexturePlaneData.m_DirtyPlaneMask }); AddPostBatchFunction([cleanup = pResource->m_PendingRenameViaCopyCookie.Detach(), &immCtx = GetImmediateContextNoFlush()](){ immCtx.DeleteRenameCookie(cleanup); }); pResource->m_DynamicTexturePlaneData = {}; pResource->m_LastRenamedResource.Reset(); } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::QueryBegin(BatchedQuery* pAsync) { if (pAsync->m_CurrentState == Async::AsyncState::Begun) { QueryEnd(pAsync); } auto Lock = m_RecordingLock.TakeLock(); pAsync->m_BatchReferenceID = m_RecordingBatchID; AddToBatch(CmdQueryBegin{ pAsync->GetImmediateNoFlush() }); pAsync->m_CurrentState = Async::AsyncState::Begun; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::QueryEnd(BatchedQuery* pAsync) { if (pAsync->m_CurrentState == Async::AsyncState::Ended && pAsync->GetImmediateNoFlush()->RequiresBegin()) { QueryBegin(pAsync); } auto Lock = m_RecordingLock.TakeLock(); pAsync->m_BatchReferenceID = m_RecordingBatchID; AddToBatch(CmdQueryEnd{ pAsync->GetImmediateNoFlush() }); pAsync->m_CurrentState = Async::AsyncState::Ended; } //---------------------------------------------------------------------------------------------------------------------------------- template bool BatchedContext::SyncWithBatch(uint64_t& BatchID, bool DoNotFlush, TFunc&& GetImmObjectFenceValues) { { auto RecordingLock = m_RecordingLock.TakeLock(); assert(BatchID <= m_RecordingBatchID); constexpr uint64_t GenerationIDMask = 0xffffffff00000000ull; if ((BatchID & GenerationIDMask) < (m_RecordingBatchID & GenerationIDMask)) { // The batch ID comes from a different "generation" of batches. // Essentially, an object which wants to wait for a specific batch to be done // likely needs to monitor all functions which modify its state. // // If a command list batch is executed, it could modify the object's state // in a way that doesn't update the batch ID to be monitored. // In that case, assume that it could've been modified in the batch currently // being recorded (or the last one if this one's empty) // // Note that each generation starts with ID 1, so this subtraction will never // roll backwards into a previous generation. BatchID = m_CurrentBatch.empty() ? m_RecordingBatchID - 1 : m_RecordingBatchID; } // Not even submitted yet. if (BatchID >= m_RecordingBatchID) { if (!DoNotFlush) { // Submit and request flush to GPU as soon as it's done. SubmitBatch(true); } // Theoretically we could avoid this, as the query might actually finish in the time // between here and the checks below, but this fails the conformance tests, so we'll // play it safe and just assume that can't happen. return false; } } auto SubmissionLock = m_SubmissionLock.TakeLock(); if (m_CompletedBatchID < BatchID) { if (!DoNotFlush) { assert(!m_QueuedBatches.empty()); // Make sure it's marked to flush when it's done. auto iter = std::find_if(m_QueuedBatches.begin(), m_QueuedBatches.end(), [&BatchID](std::unique_ptr const& p) { return p->m_BatchID > BatchID; }); assert(iter != m_QueuedBatches.begin()); --iter; // We don't know what command list types to use on this timeline, so just request all. (*iter)->m_FlushRequestedMask |= COMMAND_LIST_TYPE_ALL_MASK; } return false; } UINT64 FenceValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID] = {}; GetImmObjectFenceValues(FenceValues); if (!DoNotFlush) { // Check for flush. UINT FlushMask = 0; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if (FenceValues[i] != 0 && // Note: Last seen command list IDs might not be completely up-to-date // if immediate context was accessed directly, so we need a conservative >= here, // but after at least attempting to flush once, they will become up-to-date. FenceValues[i] >= m_ImmCtx.GetCommandListIDInterlockedRead((COMMAND_LIST_TYPE)i)) { FlushMask |= (1 << i); } } if (FlushMask != 0) { // Not checking thread idle bit as we're already under the lock. if (m_QueuedBatches.empty()) { m_ImmCtx.PrepForCommandQueueSync(FlushMask); } else { m_QueuedBatches.front()->m_FlushRequestedMask |= FlushMask; } return false; } } for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (m_ImmCtx.GetCompletedFenceValue((COMMAND_LIST_TYPE)i) < FenceValues[i]) { // Note: Work may not have been flushed to GPU if we were called with DoNotFlush. // Either way, work isn't done. return false; } } // CPU and GPU are done with it. return true; } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API BatchedContext::QueryGetData(BatchedQuery* pAsync, void* pData, UINT DataSize, bool DoNotFlush) { // Make sure it's in the right state (might update batch reference) if (pAsync->m_CurrentState == Async::AsyncState::Begun) { QueryEnd(pAsync); } // Check if it's done yet. if (!SyncWithBatch(pAsync->m_BatchReferenceID, DoNotFlush, [pAsync](UINT64* pFenceValues) { auto& EndedCommandListIDs = pAsync->GetImmediateNoFlush()->m_EndedCommandListID; std::copy(EndedCommandListIDs, std::end(EndedCommandListIDs), pFenceValues); })) { return false; } ImmediateContext& ImmCtx = GetImmediateContextNoFlush(); bool bAsyncGetData = m_CreationArgs.SubmitBatchesToWorkerThread; return ImmCtx.QueryGetData(pAsync->GetImmediateNoFlush(), pData, DataSize, DoNotFlush, bAsyncGetData); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetPredication(Query* pPredicate, BOOL PredicateValue) { AddToBatch(CmdSetPredication{ pPredicate, PredicateValue }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ResourceCopy(Resource* pDst, Resource* pSrc) { AddToBatch(CmdResourceCopy{ pDst, pSrc }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ResourceResolveSubresource(Resource* pDst, UINT DstSubresource, Resource* pSrc, UINT SrcSubresource, DXGI_FORMAT Format) { AddToBatch(CmdResolveSubresource{ pDst, pSrc, DstSubresource, SrcSubresource, Format }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ResourceCopyRegion(Resource* pDst, UINT DstSubresource, UINT DstX, UINT DstY, UINT DstZ, Resource* pSrc, UINT SrcSubresource, const D3D12_BOX* pSrcBox) { AddToBatch(CmdResourceCopyRegion{ pDst, pSrc, DstSubresource, SrcSubresource, DstX, DstY, DstZ, pSrcBox ? *pSrcBox : m_ImmCtx.GetBoxFromResource(pSrc, SrcSubresource) }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetResourceMinLOD(Resource* pResource, FLOAT MinLOD) { AddToBatch(CmdSetResourceMinLOD{ pResource, MinLOD }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::CopyStructureCount(Resource* pDstResource, UINT DstAlignedByteOffset, UAV* pSrcUAV) { AddToBatch(CmdCopyStructureCount{ pDstResource, pSrcUAV, DstAlignedByteOffset }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::RotateResourceIdentities(Resource* const* ppResources, UINT Resources) { AddToBatchVariableSize(CmdRotateResourceIdentities{ Resources }, Resources, ppResources); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetHardwareProtection(Resource* pResource, UINT Value) { AddToBatch(CmdSetHardwareProtection{ pResource, Value }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetHardwareProtectionState(BOOL state) { AddToBatch(CmdSetHardwareProtectionState{ state }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::BatchExtension(BatchedExtension* pExt, const void* pData, size_t DataSize) { EmplaceInBatch(pExt, pData, DataSize); } BatchedContext::CmdExtension::CmdExtension(BatchedExtension* pExt, const void* pData, size_t DataSize) : pExt(pExt) , DataSize(Align(DataSize, sizeof(BatchPrimitive))) { // Ensure extension data is 64-bit aligned. void* pDataDst = AlignPtr(this + 1); if (pData) { memcpy(pDataDst, pData, DataSize); } } size_t BatchedContext::CmdExtension::GetCommandSize(BatchedExtension*, const void*, size_t DataSize) { return Align(sizeof(BatchedContext::CmdExtension), sizeof(BatchPrimitive)) + Align(DataSize, sizeof(BatchPrimitive)); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ClearState() { AddToBatch(CmdClearState{}); ClearStateImpl(); } //---------------------------------------------------------------------------------------------------------------------------------- void BatchedContext::ClearStateImpl() { m_UAVs.Clear(); m_NumScissors = 0; m_NumViewports = 0; ZeroMemory(m_Scissors, sizeof(m_Scissors)); ZeroMemory(m_Viewports, sizeof(m_Viewports)); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::UpdateTileMappings( Resource* pTiledResource, UINT NumTiledResourceRegions, _In_reads_(NumTiledResourceRegions) const D3D12_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoords, _In_reads_opt_(NumTiledResourceRegions) const D3D12_TILE_REGION_SIZE* pTiledResourceRegionSizes, Resource* pTilePool, UINT NumRanges, _In_reads_opt_(NumRanges) const ImmediateContext::TILE_RANGE_FLAG* pRangeFlags, _In_reads_opt_(NumRanges) const UINT* pTilePoolStartOffsets, _In_reads_opt_(NumRanges) const UINT* pRangeTileCounts, ImmediateContext::TILE_MAPPING_FLAG Flags) { EmplaceInBatch(pTiledResource, NumTiledResourceRegions, pTiledResourceRegionStartCoords, pTiledResourceRegionSizes, pTilePool, NumRanges, pRangeFlags, pTilePoolStartOffsets, pRangeTileCounts, Flags); } BatchedContext::CmdUpdateTileMappings::CmdUpdateTileMappings( Resource* pTiledResource, UINT NumTiledResourceRegions, _In_reads_(NumTiledResourceRegions) const D3D12_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoords, _In_reads_opt_(NumTiledResourceRegions) const D3D12_TILE_REGION_SIZE* pTiledResourceRegionSizes, Resource* pTilePool, UINT NumRanges, _In_reads_opt_(NumRanges) const ImmediateContext::TILE_RANGE_FLAG* pRangeFlags, _In_reads_opt_(NumRanges) const UINT* pTilePoolStartOffsets, _In_reads_opt_(NumRanges) const UINT* pRangeTileCounts, ImmediateContext::TILE_MAPPING_FLAG Flags) : pTiledResource(pTiledResource) , NumTiledResourceRegions(NumTiledResourceRegions) , pTilePool(pTilePool) , NumRanges(NumRanges) , Flags(Flags) , bTiledResourceRegionSizesPresent(pTiledResourceRegionSizes != nullptr) , bRangeFlagsPresent(pRangeFlags != nullptr) , bTilePoolStartOffsetsPresent(pTilePoolStartOffsets != nullptr) , bRangeTileCountsPresent(pRangeTileCounts != nullptr) { struct Temp { BatchedContext::CmdUpdateTileMappings Cmd; D3D12_TILED_RESOURCE_COORDINATE Coords; } *pTemp = reinterpret_cast(this); static_assert(alignof(D3D12_TILED_RESOURCE_COORDINATE) == alignof(UINT)); static_assert(alignof(D3D12_TILE_REGION_SIZE) == alignof(UINT)); // Note: Memory for all arrays is unconditionally allocated, even if null pointers are provided, for pointer math simplicity. D3D12_TILED_RESOURCE_COORDINATE* dst_pCoords = &pTemp->Coords; auto dst_pRegions = reinterpret_cast(dst_pCoords + NumTiledResourceRegions); auto dst_pRangeFlags = reinterpret_cast(dst_pRegions + NumTiledResourceRegions); auto dst_pTilePoolStartOffsets = reinterpret_cast(dst_pRangeFlags + NumRanges); auto dst_pRangeTileCounts = dst_pTilePoolStartOffsets + NumRanges; assert( Align(reinterpret_cast(dst_pRangeTileCounts + NumRanges), sizeof(BatchPrimitive)) == reinterpret_cast(this) + GetCommandSize(pTiledResource, NumTiledResourceRegions, pTiledResourceRegionStartCoords, pTiledResourceRegionSizes, pTilePool, NumRanges, pRangeFlags, pTilePoolStartOffsets, pRangeTileCounts, Flags) ); std::copy(pTiledResourceRegionStartCoords, pTiledResourceRegionStartCoords + NumTiledResourceRegions, dst_pCoords); if (bTiledResourceRegionSizesPresent) { std::copy(pTiledResourceRegionSizes, pTiledResourceRegionSizes + NumTiledResourceRegions, dst_pRegions); } if (bRangeFlagsPresent) { std::copy(pRangeFlags, pRangeFlags + NumRanges, dst_pRangeFlags); } if (bTilePoolStartOffsetsPresent) { std::copy(pTilePoolStartOffsets, pTilePoolStartOffsets + NumRanges, dst_pTilePoolStartOffsets); } if (bRangeTileCountsPresent) { std::copy(pRangeTileCounts, pRangeTileCounts + NumRanges, dst_pRangeTileCounts); } } size_t BatchedContext::CmdUpdateTileMappings::GetCommandSize( Resource*, UINT NumTiledResourceRegions, const D3D12_TILED_RESOURCE_COORDINATE*, const D3D12_TILE_REGION_SIZE*, Resource*, UINT NumRanges, const ImmediateContext::TILE_RANGE_FLAG*, const UINT*, const UINT*, ImmediateContext::TILE_MAPPING_FLAG) { struct Temp { BatchedContext::CmdUpdateTileMappings Cmd; D3D12_TILED_RESOURCE_COORDINATE Coords; }; // Note: Memory for all arrays is unconditionally allocated, even if null pointers are provided, for pointer math simplicity. return Align(offsetof(Temp, Coords) + (sizeof(D3D12_TILED_RESOURCE_COORDINATE) + sizeof(D3D12_TILE_REGION_SIZE)) * NumTiledResourceRegions + (sizeof(ImmediateContext::TILE_RANGE_FLAG) + sizeof(UINT) * 2) * NumRanges, sizeof(BatchPrimitive)); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::CopyTileMappings(Resource* pDstTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pDstStartCoords, Resource* pSrcTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pSrcStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, ImmediateContext::TILE_MAPPING_FLAG Flags) { AddToBatch(CmdCopyTileMappings{ pDstTiledResource, pSrcTiledResource, *pDstStartCoords, *pSrcStartCoords, *pTileRegion, Flags }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::CopyTiles(Resource* pResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, Resource* pBuffer, UINT64 BufferOffset, ImmediateContext::TILE_COPY_FLAG Flags) { AddToBatch(CmdCopyTiles{ pResource, pBuffer, *pStartCoords, *pTileRegion, BufferOffset, Flags }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::UpdateTiles(Resource* pResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pCoord, _In_ const D3D12_TILE_REGION_SIZE* pRegion, const _In_ VOID* pData, UINT Flags) { UINT DataSize = pRegion->NumTiles * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; ResourceCreationArgs ResourceArgs = {}; ResourceArgs.m_appDesc = AppResourceDesc(1, 1, 1, 1, 1, 1, DataSize, 1, DXGI_FORMAT_UNKNOWN, 1, 0, RESOURCE_USAGE_STAGING, RESOURCE_CPU_ACCESS_WRITE, (RESOURCE_BIND_FLAGS)0, D3D12_RESOURCE_DIMENSION_BUFFER); ResourceArgs.m_heapDesc = CD3DX12_HEAP_DESC(DataSize, GetImmediateContextNoFlush().GetHeapProperties(D3D12_HEAP_TYPE_UPLOAD)); ResourceArgs.m_desc12 = CD3DX12_RESOURCE_DESC::Buffer(DataSize); unique_comptr spResource = Resource::CreateResource(&GetImmediateContextNoFlush(), ResourceArgs, ResourceAllocationContext::FreeThread); // throws MappedSubresource Mapped; GetImmediateContextNoFlush().MapUnderlying(spResource.get(), 0, MAP_TYPE_WRITE, nullptr, &Mapped); memcpy(Mapped.pData, pData, DataSize); CD3DX12_RANGE WrittenRange(0, DataSize); GetImmediateContextNoFlush().UnmapUnderlyingStaging(spResource.get(), 0, nullptr); CopyTiles(pResource, pCoord, pRegion, spResource.get(), 0, (ImmediateContext::TILE_COPY_FLAG)(Flags | ImmediateContext::TILE_COPY_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE)); ReleaseResource(spResource.release()); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::TiledResourceBarrier(Resource* pBefore, Resource* pAfter) { AddToBatch(CmdTiledResourceBarrier{ pBefore, pAfter }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::ResizeTilePool(Resource* pResource, UINT64 NewSize) { AddToBatch(CmdResizeTilePool{ pResource, NewSize }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SetMarker(const wchar_t* name) { UINT NumChars = (UINT)wcslen(name) + 1; AddToBatchVariableSize(CmdSetMarker{ NumChars }, NumChars, name); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::BeginEvent(const wchar_t* name) { UINT NumChars = (UINT)wcslen(name) + 1; AddToBatchVariableSize(CmdBeginEvent{ NumChars }, NumChars, name); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::EndEvent() { AddToBatch(CmdEndEvent{ }); } //---------------------------------------------------------------------------------------------------------------------------------- void BatchedContext::ProcessBatchWork(BatchStorage& batch) { // Ensure the batch is cleared, even if a batch function errors out auto FunctionExit = MakeScopeExit([&batch]() { batch.clear(); }); for (auto segmentIter = batch.segments_begin(); segmentIter != batch.segments_end(); ++segmentIter) { const void* pCommandData = segmentIter->begin(); while (pCommandData < segmentIter->end()) { UINT const& CmdValue = *reinterpret_cast(pCommandData); ASSUME(CmdValue <= c_LastCommand); m_DispatchTable[CmdValue](m_ImmCtx, pCommandData); // throws } } } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API BatchedContext::ProcessBatch() { assert(!IsBatchThread()); assert(m_CreationArgs.pParentContext == nullptr); if (m_CreationArgs.SubmitBatchesToWorkerThread) { auto Lock = m_RecordingLock.TakeLock(); SubmitBatch(); return WaitForBatchThreadIdle(); } else { // Ensure destroys are executed even if a batch function errors out. auto Lock = m_RecordingLock.TakeLock(); auto FunctionExit = MakeScopeExit([this]() { for (auto& fn : m_PostBatchFunctions) { fn(); } m_PostBatchFunctions.clear(); m_CurrentCommandCount = 0; m_PendingDestructionMemorySize = 0; m_CompletedBatchID = m_RecordingBatchID; ++m_RecordingBatchID; if (static_cast(m_RecordingBatchID) == 0) { // Rolled into the next "generation", but each generation should start with ID 1 ++m_RecordingBatchID; } }); bool bRet = !m_CurrentBatch.empty() || !m_PostBatchFunctions.empty(); ProcessBatchWork(m_CurrentBatch); // throws return bRet; } } //---------------------------------------------------------------------------------------------------------------------------------- std::unique_ptr BatchedContext::GetIdleBatch() { // Assumed m_SubmissionLock is held. // Find a free batch if possible if (!m_FreeBatches.empty()) { auto batch = std::move(m_FreeBatches.front()); m_FreeBatches.pop_front(); return batch; } return std::unique_ptr(new Batch(m_BatchStorageAllocator)); } //---------------------------------------------------------------------------------------------------------------------------------- std::unique_ptr BatchedContext::FinishBatch(bool bFlushImmCtxAfterBatch) { assert(!IsBatchThread()); BatchStorage NewBatch(m_BatchStorageAllocator); std::vector> NewPostBatchFunctions; std::unique_ptr pRet; // Synchronize with threads recording to the batch { auto Lock = m_RecordingLock.TakeLock(); if (m_CurrentBatch.empty() && m_PostBatchFunctions.empty()) { return nullptr; } std::swap(m_CurrentBatch, NewBatch); std::swap(m_PostBatchFunctions, NewPostBatchFunctions); // Synchronize with the worker thread potentially retiring batches { auto SubmissionLock = m_SubmissionLock.TakeLock(); pRet = GetIdleBatch(); } pRet->PrepareToSubmit(std::move(NewBatch), std::move(NewPostBatchFunctions), m_RecordingBatchID, m_CurrentCommandCount, bFlushImmCtxAfterBatch); m_CurrentCommandCount = 0; m_PendingDestructionMemorySize = 0; ++m_RecordingBatchID; if (static_cast(m_RecordingBatchID) == 0) { // Rolled into the next "generation", but each generation should start with ID 1 ++m_RecordingBatchID; } } return std::move(pRet); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SubmitCommandListBatch(Batch* pBatch) { assert(!IsBatchThread()); auto pExecutionContext = m_CreationArgs.pParentContext ? m_CreationArgs.pParentContext : this; auto Lock = m_RecordingLock.TakeLock(); AddToBatch(m_CurrentBatch, CmdExecuteNestedBatch{ pBatch, pExecutionContext }); m_CurrentCommandCount += pBatch->m_NumCommands; // Increase the batch ID to the next "generation" // See the comments in SyncWithBatch for more details constexpr uint64_t GenerationIDMask = 0xffffffff00000000ull; m_RecordingBatchID = (m_RecordingBatchID & GenerationIDMask) + (1ull << 32ull) + 1; SubmitBatchIfIdle(pBatch->m_NumCommands >= c_CommandKickoffMinThreshold); // It's guaranteed that a command list both begins and ends with a ClearState command, so we'll // clear our own tracked state here. ClearStateImpl(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::RetireBatch(std::unique_ptr pBatch) { // Note: Only used for command list batches - implicit batches have slightly different retiring semantics for (auto& fn : pBatch->m_PostBatchFunctions) { fn(); } auto Lock = m_SubmissionLock.TakeLock(); pBatch->Retire(m_FreePages); m_FreeBatches.emplace_back(std::move(pBatch)); } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API BatchedContext::SubmitBatch(bool bFlushImmCtxAfterBatch) { assert(!IsBatchThread()); assert(m_CreationArgs.pParentContext == nullptr); if (!m_CreationArgs.SubmitBatchesToWorkerThread) { return ProcessBatch(); } auto pBatch = FinishBatch(bFlushImmCtxAfterBatch); if (!pBatch) { return false; } { auto Lock = m_SubmissionLock.TakeLock(); m_QueuedBatches.emplace_back(std::move(pBatch)); } { auto Lock = m_RecordingLock.TakeLock(); // Check if there's room in the semaphores. assert(m_NumOutstandingBatches <= c_MaxOutstandingBatches); if (m_NumOutstandingBatches == c_MaxOutstandingBatches) { WaitForSingleBatch(INFINITE); assert(m_NumOutstandingBatches < c_MaxOutstandingBatches); } // Wake up the batch thread BOOL value = ReleaseSemaphore(m_BatchSubmittedSemaphore, 1, nullptr); assert(value == TRUE); UNREFERENCED_PARAMETER(value); ++m_NumOutstandingBatches; } return true; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::SubmitBatchIfIdle(bool bSkipFrequencyCheck) { assert(!IsBatchThread()); assert(m_CurrentCommandCount > 0); if (m_CreationArgs.SubmitBatchesToWorkerThread && // Don't do work on the app thread. (bSkipFrequencyCheck || m_CurrentCommandCount % c_CommandKickoffMinThreshold == 0) && // Avoid checking for idle all the time, it's not free. IsBatchThreadIdle()) { SubmitBatch(); } } //---------------------------------------------------------------------------------------------------------------------------------- bool BatchedContext::WaitForBatchThreadIdle() { assert(!IsBatchThread()); bool bRet = false; while (m_NumOutstandingBatches) { bRet = true; WaitForSingleBatch(INFINITE); } return bRet; } //---------------------------------------------------------------------------------------------------------------------------------- bool BatchedContext::IsBatchThreadIdle() { assert(!IsBatchThread()); while (m_NumOutstandingBatches > 0 && WaitForSingleBatch(0)); return m_NumOutstandingBatches == 0; } //---------------------------------------------------------------------------------------------------------------------------------- bool BatchedContext::WaitForSingleBatch(DWORD timeout) { assert(!IsBatchThread()); if (WaitForSingleObject(m_BatchConsumedSemaphore, timeout) == WAIT_OBJECT_0) { --m_NumOutstandingBatches; if (m_bFlushPendingCallback.exchange(false)) { m_Callbacks.PostSubmitCallback(); } return true; } return false; } //---------------------------------------------------------------------------------------------------------------------------------- void BatchedContext::ProcessBatchImpl(Batch* pBatchToProcess) { try { ProcessBatchWork(pBatchToProcess->m_BatchCommands); // throws } catch (_com_error& hrEx) { m_Callbacks.ThreadErrorCallback(hrEx.Error()); } catch (std::bad_alloc&) { m_Callbacks.ThreadErrorCallback(E_OUTOFMEMORY); } } //---------------------------------------------------------------------------------------------------------------------------------- void BatchedContext::BatchThread() { assert(IsBatchThread()); HMODULE hMyModule; // Keep this module loaded until this thread safely returns. GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, reinterpret_cast(&c_LastCommand), &hMyModule); auto ScopeExit = MakeScopeExit([hMyModule]() { FreeLibrary(hMyModule); }); while (true) { // Wait for work WaitForSingleObject(m_BatchSubmittedSemaphore, INFINITE); // Figure out what we're supposed to be working on Batch* pBatchToProcess = nullptr; { auto Lock = m_SubmissionLock.TakeLock(); if (!m_QueuedBatches.empty()) { pBatchToProcess = m_QueuedBatches.front().get(); } } // Semaphore was signaled but there's no work to be done, exit thread. if (!pBatchToProcess) { return; } // Do the work ProcessBatchImpl(pBatchToProcess); // Retire the batch for (auto& fn : pBatchToProcess->m_PostBatchFunctions) { fn(); } { auto Lock = m_SubmissionLock.TakeLock(); UINT FlushRequestedMask = pBatchToProcess->m_FlushRequestedMask; m_CompletedBatchID = pBatchToProcess->m_BatchID; pBatchToProcess->Retire(m_FreePages); m_FreeBatches.emplace_back(std::move(m_QueuedBatches.front())); m_QueuedBatches.pop_front(); m_ImmCtx.Flush(FlushRequestedMask); } BOOL value = ReleaseSemaphore(m_BatchConsumedSemaphore, 1, nullptr); assert(value == TRUE); UNREFERENCED_PARAMETER(value); } } //---------------------------------------------------------------------------------------------------------------------------------- bool BatchedContext::IsBatchThread() { return m_CreationArgs.SubmitBatchesToWorkerThread && GetCurrentThreadId() == GetThreadId(m_BatchThread.m_h); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API BatchedContext::PostSubmit() { if (IsBatchThread()) { // If we're on the worker thread, just indicate that we've flushed. // The app thread will consume this information when draining the work queue. m_bFlushPendingCallback = true; } else { // If we're on the app thread, call into the next layer up. m_Callbacks.PostSubmitCallback(); } } //---------------------------------------------------------------------------------------------------------------------------------- void* FreePageContainer::RemovePage() noexcept { auto Lock = m_CS.TakeLock(); if (m_FreePageHead != nullptr) { void* pPage = m_FreePageHead; m_FreePageHead = *reinterpret_cast(pPage); return pPage; } return nullptr; } //---------------------------------------------------------------------------------------------------------------------------------- FreePageContainer::~FreePageContainer() { while (void* pPage = RemovePage()) { operator delete(pPage); } } //---------------------------------------------------------------------------------------------------------------------------------- void* BatchedContext::BatchStorageAllocator::operator()(bool bAllocSuccess) noexcept { if (bAllocSuccess && m_Container) { return m_Container->RemovePage(); } return nullptr; } //---------------------------------------------------------------------------------------------------------------------------------- void FreePageContainer::LockedAdder::AddPage(void* pPage) noexcept { *reinterpret_cast(pPage) = m_FreePageHead; m_FreePageHead = pPage; } //---------------------------------------------------------------------------------------------------------------------------------- void BatchedContext::Batch::Retire(FreePageContainer& FreePages) noexcept { FreePageContainer::LockedAdder Adder(FreePages); for (auto& segment : m_BatchCommands.m_segments) { Adder.AddPage(segment.begin()); } m_BatchCommands.m_segments.clear(); m_PostBatchFunctions.clear(); } //---------------------------------------------------------------------------------------------------------------------------------- void BatchedContext::Batch::PrepareToSubmit(BatchStorage BatchCommands, std::vector> PostBatchFunctions, uint64_t BatchID, UINT NumCommands, bool bFlushImmCtxAfterBatch) { m_BatchCommands = std::move(BatchCommands); m_PostBatchFunctions = std::move(PostBatchFunctions); m_BatchID = BatchID; m_NumCommands = NumCommands; m_FlushRequestedMask = bFlushImmCtxAfterBatch ? COMMAND_LIST_TYPE_ALL_MASK : 0; } } ================================================ FILE: src/BlitHelper.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include "BlitHelperShaders.h" const UINT MAX_PLANES = 3; namespace D3D12TranslationLayer { static UINT RectHeight(const RECT& r) { return r.bottom - r.top; } static UINT RectWidth(const RECT& r) { return r.right - r.left; } BlitHelper::BlitHelper(ImmediateContext *pContext) : m_pParent(pContext) { } auto BlitHelper::PrepareShaders(Resource *pSrc, UINT srcPlanes, Resource *pDst, UINT dstPlanes, bool bEnableAlpha, bool bSwapRB, int& outSrcPixelScalingFactor) -> BlitPipelineState* { const D3D12_RESOURCE_DESC &srcDesc = pSrc->GetUnderlyingResource()->GetDesc(); const D3D12_RESOURCE_DESC &dstDesc = pDst->GetUnderlyingResource()->GetDesc(); if (CD3D11FormatHelper::YUV(dstDesc.Format)) { // YUV -> RGB conversion only for now, or RGB -> RGB stretching throw _com_error(E_INVALIDARG); } if (srcPlanes > MAX_PLANES) { throw _com_error(E_INVALIDARG); } if (dstPlanes != 1) { // just RGB output for now throw _com_error(E_INVALIDARG); } BlitHelperKeyUnion key; key.m_Bits.SrcFormat = srcDesc.Format; key.m_Bits.DstFormat = dstDesc.Format; key.m_Bits.DstSampleCount = dstDesc.SampleDesc.Count; key.m_Bits.bSwapRB = bSwapRB; key.m_Bits.bEnableAlpha = bEnableAlpha; key.m_Bits.Unused = 0; auto& spPSO = m_spBlitPSOs[key.m_Data]; if (!spPSO) { spPSO.reset(new BlitPipelineState(m_pParent)); struct ConvertPSOStreamDescriptor { CD3DX12_PIPELINE_STATE_STREAM_VS VS{ CD3DX12_SHADER_BYTECODE(g_VSMain, sizeof(g_VSMain)) }; CD3DX12_PIPELINE_STATE_STREAM_PS PS; CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopology{ D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE }; CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL DSS; CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC Blend; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC Samples; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask{ UINT_MAX }; } psoStream; outSrcPixelScalingFactor = 1; if (srcDesc.Format == DXGI_FORMAT_P010 || srcDesc.Format == DXGI_FORMAT_Y210) { // we need to add additional math in the shader to scale from the 10bit range into the output [0,1]. As // the input to the shader is normalized, we need to multiply by 2^6 to get a float in the [0,1] range. outSrcPixelScalingFactor = 64; } switch (srcPlanes) { case 3: psoStream.PS = CD3DX12_SHADER_BYTECODE(g_PS3PlaneYUV, sizeof(g_PS3PlaneYUV)); break; case 2: psoStream.PS = CD3DX12_SHADER_BYTECODE(g_PS2PlaneYUV, sizeof(g_PS2PlaneYUV)); break; default: switch (srcDesc.Format) { case DXGI_FORMAT_AYUV: psoStream.PS = CD3DX12_SHADER_BYTECODE(g_PSAYUV, sizeof(g_PSAYUV)); break; case DXGI_FORMAT_Y410: case DXGI_FORMAT_Y416: psoStream.PS = CD3DX12_SHADER_BYTECODE(g_PSY4XX, sizeof(g_PSY4XX)); break; case DXGI_FORMAT_YUY2: case DXGI_FORMAT_Y210: case DXGI_FORMAT_Y216: psoStream.PS = CD3DX12_SHADER_BYTECODE(g_PSPackedYUV, sizeof(g_PSPackedYUV)); break; default: if (bSwapRB) { psoStream.PS = CD3DX12_SHADER_BYTECODE(g_PSBasic_SwapRB, sizeof(g_PSBasic_SwapRB)); } else { psoStream.PS = CD3DX12_SHADER_BYTECODE(g_PSBasic, sizeof(g_PSBasic)); } } break; } psoStream.NodeMask = m_pParent->GetNodeMask(); psoStream.RTVFormats = D3D12_RT_FORMAT_ARRAY{ { dstDesc.Format }, 1 }; psoStream.Samples = dstDesc.SampleDesc; CD3DX12_DEPTH_STENCIL_DESC DSS(CD3DX12_DEFAULT{}); DSS.DepthEnable = false; psoStream.DSS = DSS; if (bEnableAlpha) { auto& blendDesc = static_cast(psoStream.Blend).RenderTarget[0]; blendDesc.BlendEnable = TRUE; blendDesc.SrcBlend = D3D12_BLEND_SRC_ALPHA; blendDesc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA; blendDesc.BlendOp = D3D12_BLEND_OP_ADD; blendDesc.SrcBlendAlpha = D3D12_BLEND_ONE; blendDesc.DestBlendAlpha = D3D12_BLEND_ONE; blendDesc.BlendOpAlpha = D3D12_BLEND_OP_ADD; } D3D12_PIPELINE_STATE_STREAM_DESC psoStreamDesc = { sizeof(psoStream), &psoStream }; ThrowFailure(m_pParent->m_pDevice12_2->CreatePipelineState(&psoStreamDesc, IID_PPV_ARGS(spPSO->GetForCreate()))); } if (!m_spRootSig) { m_spRootSig.reset(new InternalRootSignature(m_pParent)); // throw( bad_alloc ) m_spRootSig->Create(g_PSBasic, sizeof(g_PSBasic)); // throw( _com_error ) } return spPSO.get(); } void BlitHelper::Blit(Resource* pSrc, UINT SrcSubresourceIdx, const RECT& SrcRect, Resource* pDst, UINT DstSubresourceIdx, const RECT& DstRect, bool bEnableAlpha, bool bSwapRBChannels) { UINT SrcPlaneCount = pSrc->AppDesc()->NonOpaquePlaneCount(); UINT DstPlaneCount = pDst->AppDesc()->NonOpaquePlaneCount(); UINT SrcSubresourceIndices[MAX_PLANES]; UINT DstSubresourceIndices[MAX_PLANES]; auto FillIndices = [](Resource* pResource, UINT SubresourceIdx, UINT PlaneCount, UINT* pIndices) { UINT MipLevel, ArraySlice, PlaneIdx; pResource->DecomposeSubresource(SubresourceIdx, MipLevel, ArraySlice, PlaneIdx); assert(PlaneIdx == 0); for (UINT i = 0; i < PlaneCount; ++i) { pIndices[i] = pResource->GetSubresourceIndex(i, MipLevel, ArraySlice); } }; FillIndices(pSrc, SrcSubresourceIdx, SrcPlaneCount, SrcSubresourceIndices); FillIndices(pDst, DstSubresourceIdx, DstPlaneCount, DstSubresourceIndices); Blit(pSrc, SrcSubresourceIndices, SrcPlaneCount, SrcRect, pDst, DstSubresourceIndices, DstPlaneCount, DstRect, bEnableAlpha, bSwapRBChannels); } void BlitHelper::Blit(Resource *pSrc, UINT *pSrcSubresourceIndices, UINT numSrcSubresources, const RECT& srcRect, Resource *pDst, UINT *pDstSubresourceIndices, UINT numDstSubresources, const RECT& dstRect, bool bEnableAlpha, bool bSwapRBChannels) { const D3D12_RESOURCE_DESC &dstDesc = pDst->GetUnderlyingResource()->GetDesc(); assert( numSrcSubresources <= MAX_PLANES ); UINT nonMsaaSrcSubresourceIndices[MAX_PLANES]; memcpy( &nonMsaaSrcSubresourceIndices[0], pSrcSubresourceIndices, numSrcSubresources * sizeof(UINT) ); ResourceCacheEntry OwnedCacheEntryFromResolve; auto srcFormat = pSrc->AppDesc()->Format(); auto dstFormat = pDst->AppDesc()->Format(); bool needsTempRenderTarget = (dstDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) == D3D12_RESOURCE_FLAG_NONE; bool needsTwoPassColorConvert = (bSwapRBChannels && CD3D11FormatHelper::YUV( srcFormat )); //If the src is MSAA, resolve to non-MSAA if (pSrc->AppDesc()->Samples() > 1) { assert( !needsTwoPassColorConvert ); //Can't have MSAA YUV resources, so this should be false ResolveToNonMsaa( &pSrc /*inout*/, nonMsaaSrcSubresourceIndices /*inout*/, numSrcSubresources ); // We used a Cache Entry of pSrc's format to do the resolve. // If pDst uses the same format and we need a temp render target, // we should take ownership of the resource used for resolve so that // a new resource can be used for the destination of the blit. // This is to prevent the same resource being used for read and write (see comment block below). if (srcFormat == dstFormat && needsTempRenderTarget) { m_pParent->GetResourceCache().TakeCacheEntryOwnership( srcFormat, OwnedCacheEntryFromResolve ); } } int srcPixelScalingFactor = 1; BlitPipelineState* pPSO = PrepareShaders(pSrc, numSrcSubresources, pDst, numDstSubresources, bEnableAlpha, bSwapRBChannels, srcPixelScalingFactor /*out argument*/); m_pParent->PreRender(COMMAND_LIST_TYPE::GRAPHICS); // // setup the RTV // auto pNewDestinationResource = pDst; RTV* pRTV = nullptr; std::optional LocalRTV; ResourceCacheEntry OwnedCacheEntry; if (needsTempRenderTarget || needsTwoPassColorConvert) { auto& CacheEntry = m_pParent->GetResourceCache().GetResource(dstFormat, RectWidth(dstRect), RectHeight(dstRect)); pNewDestinationResource = CacheEntry.m_Resource.get(); pRTV = CacheEntry.m_RTV.get(); if (needsTempRenderTarget && needsTwoPassColorConvert) { // If we're doing both of these, then our two-pass approach is going to turn into // a three-pass approach (first color convert to a cache entry, then swap RB channels // into another cache entry, then copy to the non-renderable destination). // In this case, passes one and two will both try to render to the cache entry, // with pass two trying to simultaneously read AND write from the cache entry. // // To prevent this, take ownership of the cache entry during pass one, so that pass two // allocates a new resource. The resource from pass one will then be destroyed rather than // being cached... but this seems okay since this should be *very* uncommon m_pParent->GetResourceCache().TakeCacheEntryOwnership(dstFormat, OwnedCacheEntry); } } else { UINT subresource = pDstSubresourceIndices[0]; UINT8 DstPlane = 0, DstMip = 0; UINT16 DstArraySlice = 0; D3D12DecomposeSubresource(subresource, pDst->AppDesc()->MipLevels(), pDst->AppDesc()->ArraySize(), DstMip, DstArraySlice, DstPlane); D3D12_RENDER_TARGET_VIEW_DESC RTVDesc = {}; RTVDesc.Format = dstDesc.Format; if (pDst->AppDesc()->Samples() > 1) { RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; RTVDesc.Texture2DMSArray.ArraySize = 1; RTVDesc.Texture2DMSArray.FirstArraySlice = DstArraySlice; } else { RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; RTVDesc.Texture2DArray.MipSlice = DstMip; RTVDesc.Texture2DArray.PlaneSlice = DstPlane; RTVDesc.Texture2DArray.ArraySize = 1; RTVDesc.Texture2DArray.FirstArraySlice = DstArraySlice; } LocalRTV.emplace(m_pParent, RTVDesc, *pDst); pRTV = &LocalRTV.value(); } // // Transition the src & dst resources // { for (UINT i = 0; i < numSrcSubresources; i++) { UINT subresourceIndex = nonMsaaSrcSubresourceIndices[i]; m_pParent->GetResourceStateManager().TransitionSubresource(pSrc, subresourceIndex, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); } assert(numDstSubresources == 1); // for now, just packed output for (UINT i = 0; i < numDstSubresources; i++) { UINT subresourceIndex = needsTempRenderTarget ? 0 : pDstSubresourceIndices[i]; m_pParent->GetResourceStateManager().TransitionSubresource(pNewDestinationResource, subresourceIndex, D3D12_RESOURCE_STATE_RENDER_TARGET); } m_pParent->GetResourceStateManager().ApplyAllResourceTransitions(); } // No predication in DX9 ImmediateContext::CDisablePredication DisablePredication(m_pParent); UINT SRVBaseSlot = m_pParent->ReserveSlots(m_pParent->m_ViewHeap, MAX_PLANES); ID3D12GraphicsCommandList* pCommandList = m_pParent->GetGraphicsCommandList(); pCommandList->SetGraphicsRootSignature(m_spRootSig->GetRootSignature()); pCommandList->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); { // Unbind all VBs D3D12_VERTEX_BUFFER_VIEW VBVArray[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; memset(VBVArray, 0, sizeof(VBVArray)); pCommandList->IASetVertexBuffers(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, VBVArray); } // // set up the SRVs // for (UINT i = 0; i < numSrcSubresources; i++) { UINT subresource = nonMsaaSrcSubresourceIndices[i]; UINT8 SrcPlane = 0, SrcMip = 0; UINT16 SrcArraySlice = 0; D3D12DecomposeSubresource(subresource, pSrc->AppDesc()->MipLevels(), pSrc->AppDesc()->ArraySize(), SrcMip, SrcArraySlice, SrcPlane); auto& SrcFootprint = pSrc->GetSubresourcePlacement(subresource).Footprint; DXGI_FORMAT ViewFormat = SrcFootprint.Format; switch (ViewFormat) { case DXGI_FORMAT_R8_TYPELESS: ViewFormat = DXGI_FORMAT_R8_UNORM; break; case DXGI_FORMAT_R8G8_TYPELESS: ViewFormat = DXGI_FORMAT_R8G8_UNORM; break; case DXGI_FORMAT_R16_TYPELESS: ViewFormat = DXGI_FORMAT_R16_UNORM; break; case DXGI_FORMAT_R16G16_TYPELESS: ViewFormat = DXGI_FORMAT_R16G16_UNORM; break; case DXGI_FORMAT_AYUV: // 8bpc case DXGI_FORMAT_YUY2: // 8bpc ViewFormat = DXGI_FORMAT_R8G8B8A8_UNORM; break; case DXGI_FORMAT_Y410: // 10bpc except 2 bit alpha ViewFormat = DXGI_FORMAT_R10G10B10A2_UNORM; break; case DXGI_FORMAT_Y416: // 16bpc case DXGI_FORMAT_Y210: // 10bpc for all 4 channels case DXGI_FORMAT_Y216: // 16bpc ViewFormat = DXGI_FORMAT_R16G16B16A16_UNORM; break; } D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; SRVDesc.Format = ViewFormat; SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; if (pSrc->AppDesc()->Samples() > 1) { SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; SRVDesc.Texture2DMSArray.ArraySize = 1; SRVDesc.Texture2DMSArray.FirstArraySlice = SrcArraySlice; } else { SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; SRVDesc.Texture2DArray.MipLevels = 1; SRVDesc.Texture2DArray.MostDetailedMip = SrcMip; SRVDesc.Texture2DArray.PlaneSlice = SrcPlane; SRVDesc.Texture2DArray.ArraySize = 1; SRVDesc.Texture2DArray.FirstArraySlice = SrcArraySlice; SRVDesc.Texture2DArray.ResourceMinLODClamp = 0.0f; } D3D12_CPU_DESCRIPTOR_HANDLE SRVBaseCPU = m_pParent->m_ViewHeap.CPUHandle(SRVBaseSlot + i); m_pParent->m_pDevice12->CreateShaderResourceView(pSrc->GetUnderlyingResource(), &SRVDesc, SRVBaseCPU); } for (UINT i = numSrcSubresources; i < MAX_PLANES; ++i) { D3D12_CPU_DESCRIPTOR_HANDLE SRVBaseCPU = m_pParent->m_ViewHeap.CPUHandle(SRVBaseSlot + i); m_pParent->m_pDevice12->CopyDescriptorsSimple(1, SRVBaseCPU, m_pParent->m_NullSRVs[(UINT)RESOURCE_DIMENSION::TEXTURE2DARRAY], D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); } D3D12_GPU_DESCRIPTOR_HANDLE SRVBaseGPU = m_pParent->m_ViewHeap.GPUHandle(SRVBaseSlot); pCommandList->SetGraphicsRootDescriptorTable(0, SRVBaseGPU); auto Descriptor = pRTV->GetRefreshedDescriptorHandle(); pCommandList->OMSetRenderTargets(1, &Descriptor, TRUE, nullptr); // Constant buffers: srcRect, src dimensions { UINT subresourceIndex = nonMsaaSrcSubresourceIndices[0]; auto& srcSubresourceFootprint = pSrc->GetSubresourcePlacement(subresourceIndex).Footprint; int srcPositions[6] = { srcRect.left, srcRect.right, srcRect.top, srcRect.bottom, (int)srcSubresourceFootprint.Width, (int)srcSubresourceFootprint.Height }; if (srcSubresourceFootprint.Format == DXGI_FORMAT_YUY2 && m_pParent->m_CreationArgs.AdjustYUY2BlitCoords) { srcPositions[4] *= 2; } pCommandList->SetGraphicsRoot32BitConstants(1, _countof(srcPositions), &srcPositions[0], 0); } // Constant buffers: srcPixelScalingFactor (For P010/Y410 UNORM pixel scaling to 10 bit range in [0.0f, 1.0f]) { pCommandList->SetGraphicsRoot32BitConstants(2, 1, &srcPixelScalingFactor, 0); } pCommandList->SetPipelineState(pPSO->GetForUse(COMMAND_LIST_TYPE::GRAPHICS)); CD3DX12_VIEWPORT Viewport((FLOAT)dstRect.left, (FLOAT)dstRect.top, (FLOAT)RectWidth(dstRect), (FLOAT)RectHeight(dstRect)); CD3DX12_RECT Scissor(dstRect); pCommandList->RSSetViewports(1, &Viewport); pCommandList->RSSetScissorRects(1, &Scissor); pCommandList->DrawInstanced(4, 1, 0, 0); if (needsTwoPassColorConvert) { UINT srcSubresourceIndex = 0; Blit(pNewDestinationResource, &srcSubresourceIndex, 1, dstRect, pDst, pDstSubresourceIndices, numDstSubresources, dstRect, bEnableAlpha, bSwapRBChannels); } else if (needsTempRenderTarget) { D3D12_BOX srcBox = { 0, 0, 0, RectWidth(dstRect), RectHeight(dstRect), 1 }; m_pParent->ResourceCopyRegion( pDst, pDstSubresourceIndices[0], dstRect.left, dstRect.top, 0, pNewDestinationResource, 0, &srcBox); } m_pParent->PostRender(COMMAND_LIST_TYPE::GRAPHICS, e_GraphicsStateDirty); } void BlitHelper::ResolveToNonMsaa( _Inout_ Resource **ppResource, _Inout_ UINT* pSubresourceIndices, UINT numSubresources ) { auto pResource = *ppResource; assert( numSubresources == 1 ); // assert that it's only 1 because you can't have MSAA YUV resources. auto srcDesc = pResource->GetUnderlyingResource()->GetDesc(); UINT width = static_cast(srcDesc.Width); UINT height = static_cast(srcDesc.Height); auto& cacheEntry = m_pParent->GetResourceCache().GetResource( pResource->AppDesc()->Format(), width, height ); auto pCacheResource = cacheEntry.m_Resource.get(); for (UINT i = 0; i < numSubresources; i++) { auto cacheSubresourceIndex = pCacheResource->GetSubresourceIndex( i, 0, 0 ); m_pParent->ResourceResolveSubresource( pCacheResource, cacheSubresourceIndex, pResource, pSubresourceIndices[i], pResource->AppDesc()->Format() ); pSubresourceIndices[i] = cacheSubresourceIndex; } *ppResource = pCacheResource; } } ================================================ FILE: src/CMakeLists.txt ================================================ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. cmake_minimum_required(VERSION 3.13) include(CheckCXXSourceCompiles) set(SRC Allocator.cpp BatchedContext.cpp BlitHelper.cpp ColorConvertHelper.cpp CommandListManager.cpp DeviceChild.cpp Fence.cpp FormatDescImpl.cpp ImmediateContext.cpp Main.cpp MaxFrameLatencyHelper.cpp PipelineState.cpp Query.cpp Residency.cpp Resource.cpp ResourceBinding.cpp ResourceCache.cpp ResourceState.cpp RootSignature.cpp Shader.cpp SubresourceHelpers.cpp SwapChainHelper.cpp SwapChainManager.cpp Util.cpp VideoDecode.cpp VideoDecodeStatistics.cpp VideoDevice.cpp VideoProcess.cpp VideoProcessEnum.cpp VideoReferenceDataManager.cpp View.cpp) set (INC ../include/Allocator.h ../include/BatchedContext.hpp ../include/BatchedQuery.hpp ../include/BatchedResource.hpp ../include/BlitHelper.hpp ../include/BlitHelperShaders.h ../include/BlockAllocators.h ../include/CommandListManager.hpp ../include/D3D12TranslationLayerDependencyIncludes.h ../include/D3D12TranslationLayerIncludes.h ../include/DeviceChild.hpp ../include/DXGIColorSpaceHelper.h ../include/Fence.hpp ../include/FormatDesc.hpp ../include/ImmediateContext.hpp ../include/MaxFrameLatencyHelper.hpp ../include/pch.h ../include/PipelineState.hpp ../include/PrecompiledShaders.h ../include/Query.hpp ../include/Residency.h ../include/Resource.hpp ../include/ResourceBinding.hpp ../include/ResourceCache.hpp ../include/ResourceState.hpp ../include/RootSignature.hpp ../include/Sampler.hpp ../include/segmented_stack.h ../include/Shader.hpp ../include/SubresourceHelpers.hpp ../include/SwapChainHelper.hpp ../include/SwapChainManager.hpp ../include/ThreadPool.hpp ../include/Util.hpp ../include/VideoDecode.hpp ../include/VideoDecodeStatistics.hpp ../include/VideoDevice.hpp ../include/VideoProcess.hpp ../include/VideoProcessEnum.hpp ../include/VideoProcessShaders.h ../include/VideoReferenceDataManager.hpp ../include/VideoViewHelper.hpp ../include/View.hpp) file(GLOB INL ../include/*.inl *.inl) file(GLOB EXTERNAL_INC ../external/*.h ../external/*.hpp) add_library(d3d12translationlayer STATIC ${SRC} ${INC} ${INL} ${EXTERNAL_INC}) target_include_directories(d3d12translationlayer PUBLIC ../external PUBLIC ../include PRIVATE ./) if (CMAKE_VERSION VERSION_GREATER 3.16) target_precompile_headers(d3d12translationlayer PRIVATE ../include/pch.h) endif() source_group(Inlines FILES ${INL}) source_group("Header Files\\External" FILES ${EXTERNAL_INC}) target_link_libraries(d3d12translationlayer Microsoft::DirectX-Headers d3d12 dxgi atls) # Using a compile test instead of find_library so this doesn't have to be run from a VS command prompt check_cxx_source_compiles(" #pragma comment(lib, \"dxcore.lib\") #include int main() { IDXCoreAdapterFactory *fac; return DXCoreCreateAdapterFactory(&fac); }" HAVE_DXCORE_LIBRARY) if (HAVE_DXCORE_LIBRARY) target_link_libraries(d3d12translationlayer dxcore) target_link_options(d3d12translationlayer INTERFACE "/DELAYLOAD:dxcore.dll") else() target_compile_definitions(d3d12translationlayer PRIVATE DYNAMIC_LOAD_DXCORE=1) endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../packages.config ${CMAKE_CURRENT_BINARY_DIR}/packages.config COPYONLY) add_library(WinPixEventRuntime INTERFACE IMPORTED GLOBAL) set_target_properties(WinPixEventRuntime PROPERTIES INTERFACE_LINK_LIBRARIES "${CMAKE_BINARY_DIR}/packages/WinPixEventRuntime.1.0.190604001/build/WinPixEventRuntime.targets") target_link_libraries(d3d12translationlayer WinPixEventRuntime) target_compile_definitions(d3d12translationlayer PRIVATE $<$:DBG>) target_compile_definitions(d3d12translationlayer PUBLIC $<$:TRANSLATION_LAYER_DBG=1>) if (USE_PIX) target_compile_definitions(d3d12translationlayer PUBLIC USE_PIX) endif() if(MSVC) if("${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC") target_compile_options(d3d12translationlayer PUBLIC /W4 /WX /wd4238 /wd4324) else() target_compile_options(d3d12translationlayer PUBLIC /W4 /wd4238 /wd4324 -Werror -Wno-missing-braces -Wno-shift-overflow -Wno-delete-non-abstract-non-virtual-dtor -Wno-unused-lambda-capture -Wno-implicit-exception-spec-mismatch -Wno-unused-const-variable -Wno-unused-variable -Wno-unused-parameter -Wno-enum-compare -Wno-reorder-ctor -Wno-sign-compare -Wno-pessimizing-move -Wno-c++11-narrowing -Wno-address-of-temporary -Wno-switch -Wno-missing-field-initializers) endif() target_link_options(d3d12translationlayer INTERFACE "/ignore:4286") else() target_compile_options(d3d12translationlayer PUBLIC -Wall -Wextra -pedantic -Werror) endif() if (NOT IMPLEMENT_RECTS) target_compile_definitions(d3d12translationlayer PRIVATE NO_IMPLEMENT_RECT_FNS) endif() include(CheckIncludeFileCXX) CHECK_INCLUDE_FILE_CXX(d3d12TokenizedProgramFormat.hpp HAS_WDK) if (HAS_WDK) message(NOTICE "Adding WDK-dependent project.") target_compile_definitions(d3d12translationlayer PUBLIC SUPPORTS_DXBC_PARSE) add_library(d3d12translationlayer_wdk STATIC DxbcBuilder.cpp ShaderBinary.cpp ShaderParser.cpp SharedResourceHelpers.cpp ../include/DxbcBuilder.hpp ../include/ShaderBinary.h ../include/SharedResourceHelpers.hpp) target_link_libraries(d3d12translationlayer_wdk d3d12translationlayer) else() message(WARNING "Only adding SDK-dependent projects.") endif() ================================================ FILE: src/ColorConvertHelper.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include #include namespace D3D12TranslationLayer { typedef enum { PRIMARY_709, PRIMARY_2020, PRIMARY_601, PRIMARY_UNDEFINED } PrimaryType; typedef float CCMatrix[3][3]; typedef enum { NOMINAL_RANGE_TYPE_FULL, NOMINAL_RANGE_TYPE_STUDIO, } NominalRangeType; typedef struct { float YRange[2]; float CRange[2]; } CCNominalRange; // for RGB to YCbCr (same primaries), assumes normalized input static const CCMatrix RGBToYCbCr[] = { { // PRIMARY_709, { 0.212600f, 0.715200f, 0.072200f }, { -0.114572f, -0.385428f, 0.500000f }, { 0.500000f, -0.454153f, -0.045847f }, }, { // PRIMARY_2020, { 0.262700f, 0.678000f, 0.059300f }, { -0.139630f, -0.360370f, 0.500000f }, { 0.500000f, -0.459786f, -0.040214f }, }, { // PRIMARY_601, { 0.299000f, 0.587000f, 0.114000f }, { -0.168736f, -0.331264f, 0.500000f }, { 0.500000f, -0.418688f, -0.081312f }, }, }; // for YCbCr to RGB (same primaries), assumes normalized input static const CCMatrix YCbCrToRGB[] = { // PRIMARY_709 { { 1.000000f, 0.000000f, 1.574800f }, { 1.000000f, -0.187324f, -0.468124f }, { 1.000000f, 1.855600f, 0.000000f }, }, // PRIMARY_2020 { { 1.000000f, -0.000000f, 1.474600f }, { 1.000000f, -0.164553f, -0.571353f }, { 1.000000f, 1.881400f, 0.000000f }, }, // PRIMARY_601 { { 1.000000f, 0.000000f, 1.402000f }, { 1.000000f, -0.344136f, -0.714136f }, { 1.000000f, 1.772000f, -0.000000f }, }, }; static const CCNominalRange CCNominalRanges[] = { { // NOMINAL_RANGE_TYPE_FULL, { 0.0f, 1.0f }, { 0.0f, 1.0f }, }, { // NOMINAL_RANGE_TYPE_STUDIO, { 0.063f, 0.92f }, { 0.063f, 0.94f }, }, }; static PrimaryType GetPrimaryType(DXGI_COLOR_SPACE_TYPE colorSpace) { if (CDXGIColorSpaceHelper::Is709ColorSpace(colorSpace)) { return PRIMARY_709; } else if (CDXGIColorSpaceHelper::Is2020ColorSpace(colorSpace)) { return PRIMARY_2020; } else if (CDXGIColorSpaceHelper::Is601ColorSpace(colorSpace)) { return PRIMARY_601; } else { ThrowFailure(E_UNEXPECTED); // need to handle new color spaces } return PRIMARY_UNDEFINED; } static NominalRangeType GetNominalRangeType(DXGI_COLOR_SPACE_TYPE colorSpace) { if (CDXGIColorSpaceHelper::IsStudioColorSpace(colorSpace)) { return NOMINAL_RANGE_TYPE_STUDIO; } else { return NOMINAL_RANGE_TYPE_FULL; } } static float clip(float low, float high, float val) { return (val > high ? high : (val < low ? low : val)); } static void mulmatrix(_In_ const CCMatrix& matrix, _In_reads_(4) const FLOAT normInput[4], _Out_writes_(4) FLOAT normOutput[4]) { normOutput[0] = matrix[0][0] * normInput[0] + matrix[0][1] * normInput[1] + matrix[0][2] * normInput[2]; normOutput[1] = matrix[1][0] * normInput[0] + matrix[1][1] * normInput[1] + matrix[1][2] * normInput[2]; normOutput[2] = matrix[2][0] * normInput[0] + matrix[2][1] * normInput[1] + matrix[2][2] * normInput[2]; } // // Implements a narrow subset of color conversions: full input nominal ranges and just does conversion from RGB to YUV and from YUV to RGB. // _Use_decl_annotations_ void ColorConvertNormalized(const FLOAT normInput[4], DXGI_COLOR_SPACE_TYPE inputColorSpace, FLOAT normOutput[4], DXGI_COLOR_SPACE_TYPE outputColorSpace) { PrimaryType outputPrimary = GetPrimaryType(outputColorSpace); NominalRangeType outputNominalRange = GetNominalRangeType(outputColorSpace); bool inputRGB = CDXGIColorSpaceHelper::IsRGBColorSpace(inputColorSpace); bool outputRGB = CDXGIColorSpaceHelper::IsRGBColorSpace(outputColorSpace); if (inputRGB == outputRGB) { // this just converts from YUV to RGB and from RGB to YUV... ThrowFailure(E_INVALIDARG); } FLOAT input[4]; input[0] = normInput[0]; input[1] = normInput[1]; input[2] = normInput[2]; input[3] = normInput[3]; // convert input from studio to full if needed if (CDXGIColorSpaceHelper::IsStudioColorSpace(inputColorSpace)) { const CCNominalRange &nominalRange = CCNominalRanges[NOMINAL_RANGE_TYPE_STUDIO]; const float YRange = nominalRange.YRange[1] - nominalRange.YRange[0]; const float CRange = inputRGB ? YRange : nominalRange.CRange[1] - nominalRange.CRange[0]; // no difference from C/Y range if RGB const float CRange0 = inputRGB ? nominalRange.YRange[0] : nominalRange.CRange[0]; // expand to full range and clip input[0] = (input[0] - nominalRange.YRange[0] ) / YRange; input[1] = (input[1] - CRange0) / CRange; input[2] = (input[2] - CRange0) / CRange; input[0] = clip(0.0f, 1.0f, input[0]); input[1] = clip(0.0f, 1.0f, input[1]); input[2] = clip(0.0f, 1.0f, input[2]); } // converts from full range into full range if (inputRGB) { mulmatrix(RGBToYCbCr[outputPrimary], input, normOutput); normOutput[1] += 0.5f; normOutput[2] += 0.5f; } else { input[1] = input[1] - 0.5f; input[2] = input[2] - 0.5f; mulmatrix(YCbCrToRGB[outputPrimary], input, normOutput); } normOutput[3] = input[3]; // clip to 0->1 range normOutput[0] = clip(0.0f, 1.0f, normOutput[0]); normOutput[1] = clip(0.0f, 1.0f, normOutput[1]); normOutput[2] = clip(0.0f, 1.0f, normOutput[2]); // if the output is not full, we need to compress to studio range if (outputNominalRange != NOMINAL_RANGE_TYPE_FULL) { const CCNominalRange &nominalRange = CCNominalRanges[outputNominalRange]; const float YRange = nominalRange.YRange[1] - nominalRange.YRange[0]; const float CRange = outputRGB ? YRange : nominalRange.CRange[1] - nominalRange.CRange[0]; // no difference from C/Y range if RGB const float CRange0 = outputRGB ? nominalRange.YRange[0] : nominalRange.CRange[0]; // no difference from C/Y range if RGB const float CRange1 = outputRGB ? nominalRange.YRange[1] : nominalRange.CRange[1]; // no difference from C/Y range if RGB normOutput[0] = normOutput[0] * YRange + nominalRange.YRange[0]; normOutput[1] = normOutput[1] * CRange + CRange0; normOutput[2] = normOutput[2] * CRange + CRange0; normOutput[0] = clip(nominalRange.YRange[0], nominalRange.YRange[1], normOutput[0]); normOutput[1] = clip(CRange0, CRange1, normOutput[1]); normOutput[2] = clip(CRange0, CRange1, normOutput[2]); } } }; ================================================ FILE: src/CommandListManager.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { // must match COMMAND_LIST_TYPE enum D3D12_COMMAND_LIST_TYPE D3D12TypeMap[] = { D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS }; static_assert(_countof(D3D12TypeMap) == (size_t)COMMAND_LIST_TYPE::MAX_VALID, "D3D12TypeMap must match COMMAND_LIST_TYPE enum."); //================================================================================================================================== // //================================================================================================================================== //---------------------------------------------------------------------------------------------------------------------------------- CommandListManager::CommandListManager(ImmediateContext *pParent, ID3D12CommandQueue *pQueue, COMMAND_LIST_TYPE type) : m_pParent(pParent) , m_type(type) , m_pCommandQueue(pQueue) , m_bNeedSubmitFence(false) , m_pCommandList(nullptr) , m_pCommandAllocator(nullptr) , m_AllocatorPool(false /*bLock*/, GetMaxInFlightDepth(type)) , m_hWaitEvent(CreateEvent(nullptr, FALSE, FALSE, nullptr)) // throw( _com_error ) , m_MaxAllocatedUploadHeapSpacePerCommandList(cMaxAllocatedUploadHeapSpacePerCommandList) { ResetCommandListTrackingData(); m_MaxAllocatedUploadHeapSpacePerCommandList = min(m_MaxAllocatedUploadHeapSpacePerCommandList, m_pParent->m_CreationArgs.MaxAllocatedUploadHeapSpacePerCommandList); if (!m_pCommandQueue) { D3D12_COMMAND_QUEUE_DESC queue = {}; queue.Type = GetD3D12CommandListType(m_type); queue.NodeMask = m_pParent->GetNodeMask(); queue.Flags = m_pParent->m_CreationArgs.DisableGPUTimeout ? D3D12_COMMAND_QUEUE_FLAG_DISABLE_GPU_TIMEOUT : D3D12_COMMAND_QUEUE_FLAG_NONE; CComPtr spDevice9; if (SUCCEEDED(m_pParent->m_pDevice12->QueryInterface(&spDevice9))) { ThrowFailure(spDevice9->CreateCommandQueue1(&queue, m_pParent->m_CreationArgs.CreatorID, IID_PPV_ARGS(&m_pCommandQueue))); } else { ThrowFailure(m_pParent->m_pDevice12->CreateCommandQueue(&queue, IID_PPV_ARGS(&m_pCommandQueue))); } } PrepareNewCommandList(); m_pCommandQueue->QueryInterface(&m_pSharingContract); // Ignore failure, interface not always present. } //---------------------------------------------------------------------------------------------------------------------------------- CommandListManager::~CommandListManager() { } void CommandListManager::ReadbackInitiated() noexcept { m_NumFlushesWithNoReadback = 0; } void CommandListManager::AdditionalCommandsAdded() noexcept { m_NumCommands++; } void CommandListManager::DrawCommandAdded() noexcept { m_NumDraws++; } void CommandListManager::DispatchCommandAdded() noexcept { m_NumDispatches++; } void CommandListManager::UploadHeapSpaceAllocated(UINT64 heapSize) noexcept { m_UploadHeapSpaceAllocated += heapSize; } void CommandListManager::SubmitCommandListIfNeeded() { // TODO: Heuristics below haven't been heavily profiled, we'll likely want to re-visit and tune // this based on multiple factors when profiling (i.e. number of draws, amount of memory // referenced, etc.), possibly changing on an app-by-app basis // These parameters attempt to avoid regressing already CPU bound applications. // In these cases, submitting too frequently will make the app slower due // to frequently re-emitting state and the overhead of submitting/creating command lists static const UINT cMinDrawsOrDispatchesForSubmit = 512; static const UINT cMinRenderOpsForSubmit = 1000; // To further avoid regressing CPU bound applications, we'll stop opportunistic // flushing if it appears that the app doesn't need to kick off work early. static const UINT cMinFlushesWithNoCPUReadback = 50; const bool bHaveEnoughCommandsForSubmit = m_NumCommands > cMinRenderOpsForSubmit || m_NumDraws + m_NumDispatches > cMinDrawsOrDispatchesForSubmit; const bool bShouldOpportunisticFlush = m_NumFlushesWithNoReadback < cMinFlushesWithNoCPUReadback; const bool bShouldFreeUpMemory = m_UploadHeapSpaceAllocated > m_MaxAllocatedUploadHeapSpacePerCommandList; if ((bHaveEnoughCommandsForSubmit && bShouldOpportunisticFlush) || bShouldFreeUpMemory) { // If the GPU is idle, submit work to keep it busy if (m_Fence.GetCompletedValue() == m_commandListID - 1) { if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "OpportunisticFlush", TraceLoggingUInt32(m_NumCommands, "NumCommands"), TraceLoggingUInt32(m_NumDraws, "NumDraws"), TraceLoggingUInt32(m_NumDispatches, "NumDispatches"), TraceLoggingUInt64(m_UploadHeapSpaceAllocated, "UploadHeapSpaceAllocated")); } SubmitCommandListImpl(); } } } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::PrepareNewCommandList() { HRESULT hr = S_OK; // Acquire a command allocator from the pool (or create a new one) auto pfnCreateNew = [](ID3D12Device* pDevice12, D3D12_COMMAND_LIST_TYPE type) -> unique_comptr // noexcept(false) { unique_comptr spAllocator; HRESULT hr = pDevice12->CreateCommandAllocator( type, IID_PPV_ARGS(&spAllocator) ); ThrowFailure(hr); // throw( _com_error ) return std::move(spAllocator); }; auto pfnWaitForFence = [&](UINT64 fenceVal) -> bool // noexcept(false) { return WaitForFenceValue(fenceVal); }; UINT64 CurrentFence = m_Fence.GetCompletedValue(); m_pCommandAllocator = m_AllocatorPool.RetrieveFromPool( CurrentFence, pfnWaitForFence, pfnCreateNew, m_pParent->m_pDevice12.get(), GetD3D12CommandListType(m_type) ); // Create or recycle a command list if (m_pCommandList) { // Recycle the previously created command list ResetCommandList(); } else { // Create a new command list hr = m_pParent->m_pDevice12->CreateCommandList(m_pParent->GetNodeMask(), GetD3D12CommandListType(m_type), m_pCommandAllocator.get(), nullptr, IID_PPV_ARGS(&m_pCommandList)); } ThrowFailure(hr); // throw( _com_error ) ResetResidencySet(); ResetCommandListTrackingData(); #if DBG if (m_pParent->DebugFlags() & Debug_StallExecution) { m_pCommandQueue->Wait(m_StallFence.Get(), m_commandListID); } #endif } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::ResetResidencySet() { m_pResidencySet = std::make_unique(); m_pResidencySet->Open((UINT)m_type); } HRESULT CommandListManager::PreExecuteCommandQueueCommand() { m_bNeedSubmitFence = true; m_pResidencySet->Close(); return m_pParent->GetResidencyManager().PreExecuteCommandQueueCommand(m_pCommandQueue.get(), (UINT)m_type, m_pResidencySet.get()); } HRESULT CommandListManager::PostExecuteCommandQueueCommand() { ResetResidencySet(); return S_OK; } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::SubmitCommandList() { ++m_NumFlushesWithNoReadback; SubmitCommandListImpl(); } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::SubmitCommandListImpl() // throws { // Walk through the list of active queries // and notify them that the command list is being submitted for (LIST_ENTRY *pListEntry = m_pParent->m_ActiveQueryList.Flink; pListEntry != &m_pParent->m_ActiveQueryList; pListEntry = pListEntry->Flink) { Async* pAsync = CONTAINING_RECORD(pListEntry, Async, m_ActiveQueryListEntry); pAsync->Suspend(); } CloseCommandList(m_pCommandList.get()); // throws m_pResidencySet->Close(); m_pParent->GetResidencyManager().ExecuteCommandList(m_pCommandQueue.get(), (UINT)m_type, m_pCommandList.get(), m_pResidencySet.get()); // Return the command allocator to the pool for recycling m_AllocatorPool.ReturnToPool(std::move(m_pCommandAllocator), m_commandListID); SubmitFence(); PrepareNewCommandList(); // Walk through the list of active queries // and notify them that a new command list has been prepared for (LIST_ENTRY *pListEntry = m_pParent->m_ActiveQueryList.Flink; pListEntry != &m_pParent->m_ActiveQueryList; pListEntry = pListEntry->Flink) { Async* pAsync = CONTAINING_RECORD(pListEntry, Async, m_ActiveQueryListEntry); pAsync->Resume(); } #if DBG if (m_pParent->DebugFlags() & Debug_WaitOnFlush) { WaitForFenceValue(m_commandListID - 1); // throws } #endif if (m_type == COMMAND_LIST_TYPE::GRAPHICS) { m_pParent->m_DirtyStates |= e_DirtyOnNewCommandList; m_pParent->m_StatesToReassert |= e_ReassertOnNewCommandList; } m_pParent->PostSubmitNotification(); } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::ResetCommandList() { // Reset the command allocator (indicating that the driver can recycle memory associated with it) ThrowFailure(m_pCommandAllocator->Reset()); static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "CommandListManager::ResetCommandList must support all command list types."); switch (m_type) { case COMMAND_LIST_TYPE::GRAPHICS: { ID3D12GraphicsCommandList *pGraphicsCommandList = GetGraphicsCommandList(m_pCommandList.get()); ThrowFailure(pGraphicsCommandList->Reset(m_pCommandAllocator.get(), nullptr)); break; } case COMMAND_LIST_TYPE::VIDEO_DECODE: { ThrowFailure(GetVideoDecodeCommandList(m_pCommandList.get())->Reset(m_pCommandAllocator.get())); break; } case COMMAND_LIST_TYPE::VIDEO_PROCESS: { ThrowFailure(GetVideoProcessCommandList(m_pCommandList.get())->Reset(m_pCommandAllocator.get())); break; } default: { ThrowFailure(E_UNEXPECTED); break; } } InitCommandList(); } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::InitCommandList() { static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "CommandListManager::InitCommandList must support all command list types."); switch (m_type) { case COMMAND_LIST_TYPE::GRAPHICS: { ID3D12GraphicsCommandList *pGraphicsCommandList = GetGraphicsCommandList(m_pCommandList.get()); ID3D12DescriptorHeap* pHeaps[2] = { m_pParent->m_ViewHeap.m_pDescriptorHeap.get(), m_pParent->m_SamplerHeap.m_pDescriptorHeap.get() }; // Sampler heap is null for compute-only devices; don't include it in the count. pGraphicsCommandList->SetDescriptorHeaps(m_pParent->ComputeOnly() ? 1 : 2, pHeaps); m_pParent->SetScissorRectsHelper(); break; } case COMMAND_LIST_TYPE::VIDEO_DECODE: case COMMAND_LIST_TYPE::VIDEO_PROCESS: { // No initialization needed break; } default: { ThrowFailure(E_UNEXPECTED); break; } } } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::CloseCommandList(ID3D12CommandList *pCommandList) { static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "CommandListManager::CloseCommandList must support all command list types."); switch (m_type) { case COMMAND_LIST_TYPE::GRAPHICS: { ThrowFailure(GetGraphicsCommandList(pCommandList)->Close()); break; } case COMMAND_LIST_TYPE::VIDEO_DECODE: { ThrowFailure(GetVideoDecodeCommandList(pCommandList)->Close()); break; } case COMMAND_LIST_TYPE::VIDEO_PROCESS: { ThrowFailure(GetVideoProcessCommandList(pCommandList)->Close()); break; } default: { ThrowFailure(E_UNEXPECTED); break; } } } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::AddResourceToResidencySet(Resource *pResource) { ManagedObject *pResidencyObject = pResource->GetResidencyHandle(); if (pResidencyObject) { assert(pResidencyObject->IsInitialized()); m_pResidencySet->Insert(pResidencyObject); } } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::SubmitFence() noexcept { m_pCommandQueue->Signal(m_Fence.Get(), m_commandListID); IncrementFence(); m_bNeedSubmitFence = false; } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::IncrementFence() { InterlockedIncrement64((volatile LONGLONG*)&m_commandListID); UpdateLastUsedCommandListIDs(); } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::UpdateLastUsedCommandListIDs() { // This is required for edge cases where you have a command list that has somethings bound but with no meaningful calls (draw/dispatch), // and Flush is called, incrementing the command list ID. If that command list only does resource barriers, some bound objects may not // have their CommandListID's updated } //---------------------------------------------------------------------------------------------------------------------------------- UINT64 CommandListManager::EnsureFlushedAndFenced() { m_NumFlushesWithNoReadback = 0; PrepForCommandQueueSync(); // throws UINT64 FenceValue = GetCommandListID() - 1; return FenceValue; } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::PrepForCommandQueueSync() { if (HasCommands()) { SubmitCommandListImpl(); // throws } else if (m_bNeedSubmitFence) { SubmitFence(); } } //---------------------------------------------------------------------------------------------------------------------------------- HRESULT CommandListManager::EnqueueSetEvent(HANDLE hEvent) noexcept { UINT64 FenceValue = 0; try { FenceValue = EnsureFlushedAndFenced(); // throws } catch (_com_error& e) { return e.Error(); } catch (std::bad_alloc&) { return E_OUTOFMEMORY; } #if DBG if (m_pParent->DebugFlags() & Debug_StallExecution) { m_StallFence.Signal(FenceValue); } #endif if (m_Fence.GetCompletedValue() >= FenceValue) { return SetEvent(hEvent) ? S_OK : E_FAIL; } else { return m_Fence.SetEventOnCompletion(FenceValue, hEvent); } } //---------------------------------------------------------------------------------------------------------------------------------- bool CommandListManager::WaitForCompletion() { ThrowFailure(EnqueueSetEvent(m_hWaitEvent)); // throws #ifdef USE_PIX PIXNotifyWakeFromFenceSignal(m_hWaitEvent); #endif DWORD waitRet = WaitForSingleObject(m_hWaitEvent, INFINITE); UNREFERENCED_PARAMETER(waitRet); assert(waitRet == WAIT_OBJECT_0); return true; } //---------------------------------------------------------------------------------------------------------------------------------- bool CommandListManager::WaitForFenceValue(UINT64 FenceValue) { m_NumFlushesWithNoReadback = 0; return WaitForFenceValueInternal(true, FenceValue); // throws } //---------------------------------------------------------------------------------------------------------------------------------- bool CommandListManager::WaitForFenceValueInternal(bool IsImmediateContextThread, UINT64 FenceValue) { // Command list ID is the value of the fence that will be signaled on submission UINT64 CurCmdListID = IsImmediateContextThread ? m_commandListID : GetCommandListIDInterlockedRead(); if (CurCmdListID <= FenceValue) // Using <= because value read by this thread might be stale { if (IsImmediateContextThread) { assert(CurCmdListID == FenceValue); if (HasCommands()) { SubmitCommandListImpl(); // throws } else { // We submitted only an initial data command list, but no fence // Just insert the fence now, and increment its value assert(m_bNeedSubmitFence); SubmitFence(); } CurCmdListID = m_commandListID; assert(CurCmdListID > FenceValue); } else { return false; } } if (m_Fence.GetCompletedValue() >= FenceValue) { return true; } ThrowFailure(m_Fence.SetEventOnCompletion(FenceValue, m_hWaitEvent)); #if DBG if (m_pParent->DebugFlags() & Debug_StallExecution) { m_StallFence.Signal(FenceValue); } #endif #ifdef USE_PIX PIXNotifyWakeFromFenceSignal(m_hWaitEvent); #endif DWORD waitRet = WaitForSingleObject(m_hWaitEvent, INFINITE); UNREFERENCED_PARAMETER(waitRet); assert(waitRet == WAIT_OBJECT_0); return true; } //---------------------------------------------------------------------------------------------------------------------------------- void CommandListManager::DiscardCommandList() { ResetCommandListTrackingData(); m_pCommandList = nullptr; m_pResidencySet->Close(); } D3D12_COMMAND_LIST_TYPE CommandListManager::GetD3D12CommandListType(COMMAND_LIST_TYPE type) { if (ComputeOnly()) { return D3D12_COMMAND_LIST_TYPE_COMPUTE; } else { return D3D12TypeMap[(UINT)type]; } } } ================================================ FILE: src/DeviceChild.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { void DeviceChild::AddToDeferredDeletionQueue(ID3D12Object* pObject) { m_pParent->AddObjectToDeferredDeletionQueue(pObject, m_LastUsedCommandListID, m_bWaitForCompletionRequired); } UINT64 DeviceChild::GetCommandListID(COMMAND_LIST_TYPE CommandListType) noexcept { return m_pParent->GetCommandListID(CommandListType); } void BatchedDeviceChild::ProcessBatch() { m_Parent.ProcessBatch(); } }; ================================================ FILE: src/DxbcBuilder.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include "BlobContainer.h" #include "DxbcBuilder.hpp" //================================================================================================================================= // CDXBCBuilder //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCBuilder::Init() void CDXBCBuilder::Init() { m_TotalOutputContainerSize = sizeof(DXBCHeader); m_pFirstBlob = NULL; m_pLastBlob = NULL; m_BlobCount = 0; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCBuilder::Cleanup() void CDXBCBuilder::Cleanup() { BlobNode *pNextBlob = m_pFirstBlob; while (pNextBlob) { BlobNode *pDeleteMe = pNextBlob; pNextBlob = pNextBlob->pNext; if (m_bMakeInternalCopiesOfBlobs) { free((void *)pDeleteMe->pBlobData); } free(pDeleteMe); } m_pFirstBlob = NULL; m_pLastBlob = NULL; m_TotalOutputContainerSize = 0; m_BlobCount = 0; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCBuilder::StartNewContainer void CDXBCBuilder::StartNewContainer() { Cleanup(); Init(); } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCBuilder::AppendBlob HRESULT CDXBCBuilder::AppendBlob(DXBCFourCC BlobFourCC, UINT BlobSize, const void *pBlobData) { if ((BlobSize > 0 && !pBlobData)) { return E_FAIL; } BlobNode *pNewBlobNode = (BlobNode *)malloc(sizeof(BlobNode)); if (!pNewBlobNode) { return E_OUTOFMEMORY; } // Initialize node // Check what the new total output container size will be. UINT NewTotalSize = m_TotalOutputContainerSize + BlobSize + 4 /*container index entry*/ + sizeof(DXBCBlobHeader) /* blob header */; // Checked builds include extra debug info and so can be much larger, // so don't enforce the retail blob size limit. #if DBG if ((NewTotalSize < m_TotalOutputContainerSize)) // overflow (wrap) #else if ((NewTotalSize > DXBC_MAX_SIZE_IN_BYTES) || // overflow (NewTotalSize < m_TotalOutputContainerSize)) // overflow (wrap) #endif { free(pNewBlobNode); return E_FAIL; } pNewBlobNode->BlobHeader.BlobFourCC = BlobFourCC; pNewBlobNode->BlobHeader.BlobSize = BlobSize; pNewBlobNode->pNext = NULL; if (BlobSize == 0) { pNewBlobNode->pBlobData = NULL; } else { if (m_bMakeInternalCopiesOfBlobs) { pNewBlobNode->pBlobData = (BlobNode *)malloc(BlobSize * sizeof(BYTE)); if (!pNewBlobNode->pBlobData) { free(pNewBlobNode); return E_OUTOFMEMORY; } // Copy the blob data memcpy((BYTE *)pNewBlobNode->pBlobData, pBlobData, BlobSize); } else { pNewBlobNode->pBlobData = pBlobData; } } // Blob is valid, add new node to list m_TotalOutputContainerSize = NewTotalSize; m_BlobCount++; if (m_pLastBlob) { m_pLastBlob->pNext = pNewBlobNode; } if (!m_pFirstBlob) { m_pFirstBlob = pNewBlobNode; } m_pLastBlob = pNewBlobNode; return S_OK; } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCBuilder::AppendBlob HRESULT CDXBCBuilder::AppendBlob(CDXBCParser *pParser, DXBCFourCC BlobFourCC) { if (!pParser) return E_FAIL; UINT blobIndex = pParser->FindNextMatchingBlob(BlobFourCC); if (blobIndex == DXBC_BLOB_NOT_FOUND) return S_FALSE; const void *pData = pParser->GetBlob(blobIndex); UINT cbData = pParser->GetBlobSize(blobIndex); if (!pData) return E_FAIL; return AppendBlob(BlobFourCC, cbData, pData); } //--------------------------------------------------------------------------------------------------------------------------------- // CDXBCBuilder::GetFinalDXBC HRESULT CDXBCBuilder::GetFinalDXBC(void *pCallerAllocatedMemory, UINT *pContainerSize) { if (!pCallerAllocatedMemory) { // Return how much memory the caller needs to allocate. if (pContainerSize) { *pContainerSize = m_TotalOutputContainerSize; return S_OK; } return E_FAIL; } if (!pContainerSize) { return E_FAIL; // Nothing to do. } if (*pContainerSize < m_TotalOutputContainerSize) { // not enough memory allocated, return 0 bytes written out. *pContainerSize = 0; return E_FAIL; } // Ok, we can write out the full container. DXBCHeader *pHeader = (DXBCHeader *)pCallerAllocatedMemory; UINT *pIndex = (UINT *)((BYTE *)pHeader + sizeof(DXBCHeader)); // skip past header DXBCBlobHeader *pNextBlobHeader = (DXBCBlobHeader *)((BYTE *)pIndex + m_BlobCount * sizeof(UINT)); // skip past index BYTE *pNextBlobData = (BYTE *)pNextBlobHeader + sizeof(DXBCBlobHeader); // skip past blob header BlobNode *pNextInputBlob = m_pFirstBlob; for (UINT b = 0; b < m_BlobCount; b++) { pIndex[b] = (UINT)((BYTE *)pNextBlobHeader - (BYTE *)pCallerAllocatedMemory); *pNextBlobHeader = pNextInputBlob->BlobHeader; memcpy(pNextBlobData, pNextInputBlob->pBlobData, pNextInputBlob->BlobHeader.BlobSize); // advance to next blob pNextBlobHeader = (DXBCBlobHeader *)(pNextBlobData + pNextInputBlob->BlobHeader.BlobSize); // skip past last blob's data pNextBlobData = (BYTE *)pNextBlobHeader + sizeof(DXBCBlobHeader); // skip past blob header pNextInputBlob = pNextInputBlob->pNext; } // Fill in the initial entries pHeader->BlobCount = m_BlobCount; pHeader->ContainerSizeInBytes = m_TotalOutputContainerSize; pHeader->DXBCHeaderFourCC = DXBC_FOURCC_NAME; pHeader->Version.Major = DXBC_MAJOR_VERSION; pHeader->Version.Minor = DXBC_MINOR_VERSION; //signing is left as a post processing step if needed return S_OK; } ================================================ FILE: src/Fence.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { Fence::Fence(ImmediateContext* pParent, FENCE_FLAGS Flags, UINT64 InitialValue) : DeviceChild(pParent) , m_bDeferredWaits((Flags & FENCE_FLAG_DEFERRED_WAITS) != 0) { D3D12_FENCE_FLAGS Flags12 = ((Flags & FENCE_FLAG_SHARED) ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE) | ((Flags & FENCE_FLAG_SHARED_CROSS_ADAPTER) ? D3D12_FENCE_FLAG_SHARED_CROSS_ADAPTER : D3D12_FENCE_FLAG_NONE) | ((Flags & FENCE_FLAG_NON_MONITORED) ? D3D12_FENCE_FLAG_NON_MONITORED : D3D12_FENCE_FLAG_NONE); ThrowFailure(pParent->m_pDevice12->CreateFence(InitialValue, Flags12, IID_PPV_ARGS(&m_spFence))); } Fence::Fence(ImmediateContext* pParent, HANDLE hSharedHandle) : DeviceChild(pParent) { ThrowFailure(pParent->m_pDevice12->OpenSharedHandle(hSharedHandle, IID_PPV_ARGS(&m_spFence))); } Fence::Fence(ImmediateContext* pParent, ID3D12Fence* pFence) : DeviceChild(pParent) { ThrowFailure(pFence->QueryInterface(&m_spFence)); } Fence::~Fence() { AddToDeferredDeletionQueue(m_spFence); } HRESULT TRANSLATION_API Fence::CreateSharedHandle( _In_opt_ const SECURITY_ATTRIBUTES *pAttributes, _In_ DWORD dwAccess, _In_opt_ LPCWSTR lpName, _Out_ HANDLE *pHandle) { return m_pParent->m_pDevice12->CreateSharedHandle(m_spFence.get(), pAttributes, dwAccess, lpName, pHandle); } bool TRANSLATION_API Fence::IsMonitored() const { return (m_spFence->GetCreationFlags() & D3D12_FENCE_FLAG_NON_MONITORED) == 0; } } ================================================ FILE: src/FormatDescImpl.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include #include #define R D3D11FCN_R #define G D3D11FCN_G #define B D3D11FCN_B #define A D3D11FCN_A #define D D3D11FCN_D #define S D3D11FCN_S #define X D3D11FCN_X #define _TYPELESS D3D11FCI_TYPELESS #define _FLOAT D3D11FCI_FLOAT #define _SNORM D3D11FCI_SNORM #define _UNORM D3D11FCI_UNORM #define _SINT D3D11FCI_SINT #define _UINT D3D11FCI_UINT #define _UNORM_SRGB D3D11FCI_UNORM_SRGB #define _FIXED_2_8 D3D11FCI_BIASED_FIXED_2_8 // -------------------------------------------------------------------------------------------------------------------------------- // Format Cast Sets const DXGI_FORMAT D3D11FCS_UNKNOWN[] = { DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R32G32B32A32[] = { DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R32G32B32[] = { DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R16G16B16A16[] = { DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R32G32[] = { DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R32G8X24[] = { DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R10G10B10A2[] = { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R11G11B10[] = { DXGI_FORMAT_R11G11B10_FLOAT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R8G8B8A8[] = { DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R16G16[] = { DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R32[] = { DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R24G8[] = { DXGI_FORMAT_R24G8_TYPELESS, DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R8G8[] = { DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R16[] = { DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R8[] = { DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_A8[] = { DXGI_FORMAT_A8_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R1[] = { DXGI_FORMAT_R1_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R9G9B9E5[] = { DXGI_FORMAT_R9G9B9E5_SHAREDEXP, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R8G8_B8G8[] = { DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_G8R8_G8B8[] = { DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_BC1[] = { DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_BC2[] = { DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_BC3[] = { DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_BC4[] = { DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_BC5[] = { DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_B5G6R5[] = { DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_B5G5R5A1[] = { DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_B8G8R8A8[] = { DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_B8G8R8X8[] = { DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_B8G8R8A8_Win7[] = { DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_B8G8R8X8_Win7[] = { DXGI_FORMAT_B8G8R8X8_TYPELESS, DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_R10G10B10A2_XR[] = { DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_BC6H[] = { DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_BC7[] = { DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_AYUV[] = { DXGI_FORMAT_AYUV, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_NV12[] = { DXGI_FORMAT_NV12, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_YUY2[] = { DXGI_FORMAT_YUY2, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_P010[] = { DXGI_FORMAT_P010, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_P016[] = { DXGI_FORMAT_P016, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_NV11[] = { DXGI_FORMAT_NV11, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_420_OPAQUE[] = { DXGI_FORMAT_420_OPAQUE, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_Y410[] = { DXGI_FORMAT_Y410, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_Y416[] = { DXGI_FORMAT_Y416, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_Y210[] = { DXGI_FORMAT_Y210, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_Y216[] = { DXGI_FORMAT_Y216, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_AI44[] = { DXGI_FORMAT_AI44, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_IA44[] = { DXGI_FORMAT_IA44, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_P8[] = { DXGI_FORMAT_P8, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_A8P8[] = { DXGI_FORMAT_A8P8, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_B4G4R4A4[] = { DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_P208[] = { DXGI_FORMAT_P208, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_V208[] = { DXGI_FORMAT_V208, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_V408[] = { DXGI_FORMAT_V408, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; const DXGI_FORMAT D3D11FCS_A4B4G4R4[] = { DXGI_FORMAT_A4B4G4R4_UNORM, DXGI_FORMAT_UNKNOWN // not part of cast set, just the "null terminator" }; // ---------------------------------------------------------------------------- // As much information about D3D10's interpretation of DXGI Resource Formats should be encoded in this // table, and everyone should query the information from here, be it for // specs or for code. // The new BitsPerUnit value represents two possible values. If the format is a block compressed format // then the value stored is bit per block. If the format is not a block compressed format then the value // represents bits per pixel. // ---------------------------------------------------------------------------- const CD3D11FormatHelper::FORMAT_DETAIL CD3D11FormatHelper::s_FormatDetail[] = { // DXGI_FORMAT ParentFormat pDefaultFormatCastSet BitsPerComponent[4], BitsPerUnit, SRGB, WidthAlignment, HeightAlignment, DepthAlignment, Layout, TypeLevel, ComponentName[4],ComponentInterpretation[4], bPlanar, bYUV {DXGI_FORMAT_UNKNOWN ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R32G32B32A32_TYPELESS ,DXGI_FORMAT_R32G32B32A32_TYPELESS, D3D11FCS_R32G32B32A32, {32,32,32,32}, 128, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32G32B32A32_FLOAT ,DXGI_FORMAT_R32G32B32A32_TYPELESS, D3D11FCS_R32G32B32A32, {32,32,32,32}, 128, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _FLOAT, _FLOAT, _FLOAT, _FLOAT, FALSE, FALSE, }, { DXGI_FORMAT_R32G32B32A32_UINT ,DXGI_FORMAT_R32G32B32A32_TYPELESS, D3D11FCS_R32G32B32A32, {32,32,32,32}, 128, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UINT, _UINT, _UINT, _UINT, FALSE, FALSE, }, { DXGI_FORMAT_R32G32B32A32_SINT ,DXGI_FORMAT_R32G32B32A32_TYPELESS, D3D11FCS_R32G32B32A32, {32,32,32,32}, 128, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _SINT, _SINT, _SINT, _SINT, FALSE, FALSE, }, {DXGI_FORMAT_R32G32B32_TYPELESS ,DXGI_FORMAT_R32G32B32_TYPELESS, D3D11FCS_R32G32B32, {32,32,32,0}, 96, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,B,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32G32B32_FLOAT ,DXGI_FORMAT_R32G32B32_TYPELESS, D3D11FCS_R32G32B32, {32,32,32,0}, 96, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,X, _FLOAT, _FLOAT, _FLOAT, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32G32B32_UINT ,DXGI_FORMAT_R32G32B32_TYPELESS, D3D11FCS_R32G32B32, {32,32,32,0}, 96, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,X, _UINT, _UINT, _UINT, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32G32B32_SINT ,DXGI_FORMAT_R32G32B32_TYPELESS, D3D11FCS_R32G32B32, {32,32,32,0}, 96, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,X, _SINT, _SINT, _SINT, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R16G16B16A16_TYPELESS ,DXGI_FORMAT_R16G16B16A16_TYPELESS, D3D11FCS_R16G16B16A16, {16,16,16,16}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16G16B16A16_FLOAT ,DXGI_FORMAT_R16G16B16A16_TYPELESS, D3D11FCS_R16G16B16A16, {16,16,16,16}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _FLOAT, _FLOAT, _FLOAT, _FLOAT, FALSE, FALSE, }, { DXGI_FORMAT_R16G16B16A16_UNORM ,DXGI_FORMAT_R16G16B16A16_TYPELESS, D3D11FCS_R16G16B16A16, {16,16,16,16}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT_R16G16B16A16_UINT ,DXGI_FORMAT_R16G16B16A16_TYPELESS, D3D11FCS_R16G16B16A16, {16,16,16,16}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UINT, _UINT, _UINT, _UINT, FALSE, FALSE, }, { DXGI_FORMAT_R16G16B16A16_SNORM ,DXGI_FORMAT_R16G16B16A16_TYPELESS, D3D11FCS_R16G16B16A16, {16,16,16,16}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _SNORM, _SNORM, _SNORM, _SNORM, FALSE, FALSE, }, { DXGI_FORMAT_R16G16B16A16_SINT ,DXGI_FORMAT_R16G16B16A16_TYPELESS, D3D11FCS_R16G16B16A16, {16,16,16,16}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _SINT, _SINT, _SINT, _SINT, FALSE, FALSE, }, {DXGI_FORMAT_R32G32_TYPELESS ,DXGI_FORMAT_R32G32_TYPELESS, D3D11FCS_R32G32, {32,32,0,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32G32_FLOAT ,DXGI_FORMAT_R32G32_TYPELESS, D3D11FCS_R32G32, {32,32,0,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _FLOAT, _FLOAT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32G32_UINT ,DXGI_FORMAT_R32G32_TYPELESS, D3D11FCS_R32G32, {32,32,0,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _UINT, _UINT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32G32_SINT ,DXGI_FORMAT_R32G32_TYPELESS, D3D11FCS_R32G32, {32,32,0,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _SINT, _SINT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R32G8X24_TYPELESS ,DXGI_FORMAT_R32G8X24_TYPELESS, D3D11FCS_R32G8X24, {32,8,24,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_D32_FLOAT_S8X24_UINT ,DXGI_FORMAT_R32G8X24_TYPELESS, D3D11FCS_R32G8X24, {32,8,24,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, D,S,X,X, _FLOAT,_UINT,_TYPELESS,_TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS,DXGI_FORMAT_R32G8X24_TYPELESS, D3D11FCS_R32G8X24, {32,8,24,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _FLOAT,_TYPELESS,_TYPELESS,_TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_X32_TYPELESS_G8X24_UINT ,DXGI_FORMAT_R32G8X24_TYPELESS, D3D11FCS_R32G8X24, {32,8,24,0}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, X,G,X,X, _TYPELESS,_UINT,_TYPELESS,_TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R10G10B10A2_TYPELESS ,DXGI_FORMAT_R10G10B10A2_TYPELESS, D3D11FCS_R10G10B10A2_XR,{10,10,10,2}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R10G10B10A2_UNORM ,DXGI_FORMAT_R10G10B10A2_TYPELESS, D3D11FCS_R10G10B10A2_XR,{10,10,10,2}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT_R10G10B10A2_UINT ,DXGI_FORMAT_R10G10B10A2_TYPELESS, D3D11FCS_R10G10B10A2_XR,{10,10,10,2}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UINT, _UINT, _UINT, _UINT, FALSE, FALSE, }, {DXGI_FORMAT_R11G11B10_FLOAT ,DXGI_FORMAT_R11G11B10_FLOAT, D3D11FCS_R11G11B10, {11,11,10,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,X, _FLOAT, _FLOAT, _FLOAT, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R8G8B8A8_TYPELESS ,DXGI_FORMAT_R8G8B8A8_TYPELESS, D3D11FCS_R8G8B8A8, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8G8B8A8_UNORM ,DXGI_FORMAT_R8G8B8A8_TYPELESS, D3D11FCS_R8G8B8A8, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB ,DXGI_FORMAT_R8G8B8A8_TYPELESS, D3D11FCS_R8G8B8A8, {8,8,8,8}, 32, TRUE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, FALSE, FALSE, }, { DXGI_FORMAT_R8G8B8A8_UINT ,DXGI_FORMAT_R8G8B8A8_TYPELESS, D3D11FCS_R8G8B8A8, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _UINT, _UINT, _UINT, _UINT, FALSE, FALSE, }, { DXGI_FORMAT_R8G8B8A8_SNORM ,DXGI_FORMAT_R8G8B8A8_TYPELESS, D3D11FCS_R8G8B8A8, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _SNORM, _SNORM, _SNORM, _SNORM, FALSE, FALSE, }, { DXGI_FORMAT_R8G8B8A8_SINT ,DXGI_FORMAT_R8G8B8A8_TYPELESS, D3D11FCS_R8G8B8A8, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _SINT, _SINT, _SINT, _SINT, FALSE, FALSE, }, {DXGI_FORMAT_R16G16_TYPELESS ,DXGI_FORMAT_R16G16_TYPELESS, D3D11FCS_R16G16, {16,16,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16G16_FLOAT ,DXGI_FORMAT_R16G16_TYPELESS, D3D11FCS_R16G16, {16,16,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _FLOAT, _FLOAT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16G16_UNORM ,DXGI_FORMAT_R16G16_TYPELESS, D3D11FCS_R16G16, {16,16,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _UNORM, _UNORM, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16G16_UINT ,DXGI_FORMAT_R16G16_TYPELESS, D3D11FCS_R16G16, {16,16,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _UINT, _UINT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16G16_SNORM ,DXGI_FORMAT_R16G16_TYPELESS, D3D11FCS_R16G16, {16,16,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _SNORM, _SNORM, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16G16_SINT ,DXGI_FORMAT_R16G16_TYPELESS, D3D11FCS_R16G16, {16,16,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _SINT, _SINT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R32_TYPELESS ,DXGI_FORMAT_R32_TYPELESS, D3D11FCS_R32, {32,0,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_D32_FLOAT ,DXGI_FORMAT_R32_TYPELESS, D3D11FCS_R32, {32,0,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, D,X,X,X, _FLOAT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32_FLOAT ,DXGI_FORMAT_R32_TYPELESS, D3D11FCS_R32, {32,0,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _FLOAT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32_UINT ,DXGI_FORMAT_R32_TYPELESS, D3D11FCS_R32, {32,0,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _UINT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R32_SINT ,DXGI_FORMAT_R32_TYPELESS, D3D11FCS_R32, {32,0,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _SINT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R24G8_TYPELESS ,DXGI_FORMAT_R24G8_TYPELESS, D3D11FCS_R24G8, {24,8,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_D24_UNORM_S8_UINT ,DXGI_FORMAT_R24G8_TYPELESS, D3D11FCS_R24G8, {24,8,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, D,S,X,X, _UNORM,_UINT,_TYPELESS,_TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R24_UNORM_X8_TYPELESS ,DXGI_FORMAT_R24G8_TYPELESS, D3D11FCS_R24G8, {24,8,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM,_TYPELESS,_TYPELESS,_TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_X24_TYPELESS_G8_UINT ,DXGI_FORMAT_R24G8_TYPELESS, D3D11FCS_R24G8, {24,8,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, X,G,X,X, _TYPELESS,_UINT,_TYPELESS,_TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R8G8_TYPELESS ,DXGI_FORMAT_R8G8_TYPELESS, D3D11FCS_R8G8, {8,8,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,G,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8G8_UNORM ,DXGI_FORMAT_R8G8_TYPELESS, D3D11FCS_R8G8, {8,8,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _UNORM, _UNORM, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8G8_UINT ,DXGI_FORMAT_R8G8_TYPELESS, D3D11FCS_R8G8, {8,8,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _UINT, _UINT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8G8_SNORM ,DXGI_FORMAT_R8G8_TYPELESS, D3D11FCS_R8G8, {8,8,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _SNORM, _SNORM, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8G8_SINT ,DXGI_FORMAT_R8G8_TYPELESS, D3D11FCS_R8G8, {8,8,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,X,X, _SINT, _SINT, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R16_TYPELESS ,DXGI_FORMAT_R16_TYPELESS, D3D11FCS_R16, {16,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16_FLOAT ,DXGI_FORMAT_R16_TYPELESS, D3D11FCS_R16, {16,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _FLOAT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_D16_UNORM ,DXGI_FORMAT_R16_TYPELESS, D3D11FCS_R16, {16,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, D,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16_UNORM ,DXGI_FORMAT_R16_TYPELESS, D3D11FCS_R16, {16,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16_UINT ,DXGI_FORMAT_R16_TYPELESS, D3D11FCS_R16, {16,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _UINT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16_SNORM ,DXGI_FORMAT_R16_TYPELESS, D3D11FCS_R16, {16,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _SNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R16_SINT ,DXGI_FORMAT_R16_TYPELESS, D3D11FCS_R16, {16,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _SINT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R8_TYPELESS ,DXGI_FORMAT_R8_TYPELESS, D3D11FCS_R8, {8,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, R,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8_UNORM ,DXGI_FORMAT_R8_TYPELESS, D3D11FCS_R8, {8,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8_UINT ,DXGI_FORMAT_R8_TYPELESS, D3D11FCS_R8, {8,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _UINT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8_SNORM ,DXGI_FORMAT_R8_TYPELESS, D3D11FCS_R8, {8,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _SNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_R8_SINT ,DXGI_FORMAT_R8_TYPELESS, D3D11FCS_R8, {8,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _SINT, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_A8_UNORM ,DXGI_FORMAT_A8_UNORM, D3D11FCS_A8, {0,0,0,8}, 8, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, X,X,X,A, _TYPELESS, _TYPELESS, _TYPELESS, _UNORM, FALSE, FALSE, }, {DXGI_FORMAT_R1_UNORM ,DXGI_FORMAT_R1_UNORM, D3D11FCS_R1, {1,0,0,0}, 1, FALSE, 8, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R9G9B9E5_SHAREDEXP ,DXGI_FORMAT_R9G9B9E5_SHAREDEXP, D3D11FCS_R9G9B9E5, {0,0,0,0}, 32, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _FLOAT, _FLOAT, _FLOAT, _FLOAT, FALSE, FALSE, }, {DXGI_FORMAT_R8G8_B8G8_UNORM ,DXGI_FORMAT_R8G8_B8G8_UNORM, D3D11FCS_R8G8_B8G8, {0,0,0,0}, 16, FALSE, 2, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _UNORM, _UNORM, _UNORM, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_G8R8_G8B8_UNORM ,DXGI_FORMAT_G8R8_G8B8_UNORM, D3D11FCS_G8R8_G8B8, {0,0,0,0}, 16, FALSE, 2, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _UNORM, _UNORM, _UNORM, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_BC1_TYPELESS ,DXGI_FORMAT_BC1_TYPELESS, D3D11FCS_BC1, {0,0,0,0}, 64, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC1_UNORM ,DXGI_FORMAT_BC1_TYPELESS, D3D11FCS_BC1, {0,0,0,0}, 64, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT_BC1_UNORM_SRGB ,DXGI_FORMAT_BC1_TYPELESS, D3D11FCS_BC1, {0,0,0,0}, 64, TRUE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, _UNORM, FALSE, FALSE, }, {DXGI_FORMAT_BC2_TYPELESS ,DXGI_FORMAT_BC2_TYPELESS, D3D11FCS_BC2, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC2_UNORM ,DXGI_FORMAT_BC2_TYPELESS, D3D11FCS_BC2, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT_BC2_UNORM_SRGB ,DXGI_FORMAT_BC2_TYPELESS, D3D11FCS_BC2, {0,0,0,0}, 128, TRUE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, _UNORM, FALSE, FALSE, }, {DXGI_FORMAT_BC3_TYPELESS ,DXGI_FORMAT_BC3_TYPELESS, D3D11FCS_BC3, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC3_UNORM ,DXGI_FORMAT_BC3_TYPELESS, D3D11FCS_BC3, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT_BC3_UNORM_SRGB ,DXGI_FORMAT_BC3_TYPELESS, D3D11FCS_BC3, {0,0,0,0}, 128, TRUE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, _UNORM, FALSE, FALSE, }, {DXGI_FORMAT_BC4_TYPELESS ,DXGI_FORMAT_BC4_TYPELESS, D3D11FCS_BC4, {0,0,0,0}, 64, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_PARTIAL_TYPE, R,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC4_UNORM ,DXGI_FORMAT_BC4_TYPELESS, D3D11FCS_BC4, {0,0,0,0}, 64, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC4_SNORM ,DXGI_FORMAT_BC4_TYPELESS, D3D11FCS_BC4, {0,0,0,0}, 64, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _SNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_BC5_TYPELESS ,DXGI_FORMAT_BC5_TYPELESS, D3D11FCS_BC5, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_PARTIAL_TYPE, R,G,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC5_UNORM ,DXGI_FORMAT_BC5_TYPELESS, D3D11FCS_BC5, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,X,X, _UNORM, _UNORM, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC5_SNORM ,DXGI_FORMAT_BC5_TYPELESS, D3D11FCS_BC5, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,X,X, _SNORM, _SNORM, _TYPELESS, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_B5G6R5_UNORM ,DXGI_FORMAT_B5G6R5_UNORM, D3D11FCS_B5G6R5, {5,6,5,0}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,X, _UNORM, _UNORM, _UNORM, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_B5G5R5A1_UNORM ,DXGI_FORMAT_B5G5R5A1_UNORM, D3D11FCS_B5G5R5A1, {5,5,5,1}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, {DXGI_FORMAT_B8G8R8A8_UNORM ,DXGI_FORMAT_B8G8R8A8_TYPELESS, D3D11FCS_B8G8R8A8_Win7, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, {DXGI_FORMAT_B8G8R8X8_UNORM ,DXGI_FORMAT_B8G8R8X8_TYPELESS, D3D11FCS_B8G8R8X8_Win7, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,X, _UNORM, _UNORM, _UNORM, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM ,DXGI_FORMAT_R10G10B10A2_TYPELESS, D3D11FCS_R10G10B10A2_XR,{10,10,10,2}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, R,G,B,A, _FIXED_2_8, _FIXED_2_8, _FIXED_2_8, _UNORM, FALSE, FALSE, }, {DXGI_FORMAT_B8G8R8A8_TYPELESS ,DXGI_FORMAT_B8G8R8A8_TYPELESS, D3D11FCS_B8G8R8A8_Win7, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, B,G,R,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_B8G8R8A8_UNORM_SRGB ,DXGI_FORMAT_B8G8R8A8_TYPELESS, D3D11FCS_B8G8R8A8_Win7, {8,8,8,8}, 32, TRUE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,A, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, FALSE, FALSE, }, {DXGI_FORMAT_B8G8R8X8_TYPELESS ,DXGI_FORMAT_B8G8R8X8_TYPELESS, D3D11FCS_B8G8R8X8_Win7, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_PARTIAL_TYPE, B,G,R,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_B8G8R8X8_UNORM_SRGB ,DXGI_FORMAT_B8G8R8X8_TYPELESS, D3D11FCS_B8G8R8X8_Win7, {8,8,8,8}, 32, TRUE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,X, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_BC6H_TYPELESS ,DXGI_FORMAT_BC6H_TYPELESS, D3D11FCS_BC6H, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_PARTIAL_TYPE, R,G,B,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC6H_UF16 ,DXGI_FORMAT_BC6H_TYPELESS, D3D11FCS_BC6H, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _FLOAT, _FLOAT, _FLOAT, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC6H_SF16 ,DXGI_FORMAT_BC6H_TYPELESS, D3D11FCS_BC6H, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _FLOAT, _FLOAT, _FLOAT, _TYPELESS, FALSE, FALSE, }, {DXGI_FORMAT_BC7_TYPELESS ,DXGI_FORMAT_BC7_TYPELESS, D3D11FCS_BC7, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_PARTIAL_TYPE, R,G,B,A, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_BC7_UNORM ,DXGI_FORMAT_BC7_TYPELESS, D3D11FCS_BC7, {0,0,0,0}, 128, FALSE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT_BC7_UNORM_SRGB ,DXGI_FORMAT_BC7_TYPELESS, D3D11FCS_BC7, {0,0,0,0}, 128, TRUE, 4, 4, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,A, _UNORM_SRGB, _UNORM_SRGB, _UNORM_SRGB, _UNORM, FALSE, FALSE, }, // YUV 4:4:4 formats { DXGI_FORMAT_AYUV ,DXGI_FORMAT_AYUV, D3D11FCS_AYUV, {8,8,8,8}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, TRUE, }, { DXGI_FORMAT_Y410 ,DXGI_FORMAT_Y410, D3D11FCS_Y410, {10,10,10,2}, 32, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, TRUE, }, { DXGI_FORMAT_Y416 ,DXGI_FORMAT_Y416, D3D11FCS_Y416, {16,16,16,16}, 64, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, TRUE, }, // YUV 4:2:0 formats { DXGI_FORMAT_NV12 ,DXGI_FORMAT_NV12, D3D11FCS_NV12, {0,0,0,0}, 8, FALSE, 2, 2, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, TRUE, TRUE, }, { DXGI_FORMAT_P010 ,DXGI_FORMAT_P010, D3D11FCS_P010, {0,0,0,0}, 16, FALSE, 2, 2, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, TRUE, TRUE, }, { DXGI_FORMAT_P016 ,DXGI_FORMAT_P016, D3D11FCS_P016, {0,0,0,0}, 16, FALSE, 2, 2, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, TRUE, TRUE, }, { DXGI_FORMAT_420_OPAQUE ,DXGI_FORMAT_420_OPAQUE, D3D11FCS_420_OPAQUE, {0,0,0,0}, 8, FALSE, 2, 2, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, TRUE, TRUE, }, // YUV 4:2:2 formats { DXGI_FORMAT_YUY2 ,DXGI_FORMAT_YUY2, D3D11FCS_YUY2, {0,0,0,0}, 16, FALSE, 2, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _UNORM, _UNORM, _UNORM, _TYPELESS, FALSE, TRUE, }, { DXGI_FORMAT_Y210 ,DXGI_FORMAT_Y210, D3D11FCS_Y210, {0,0,0,0}, 32, FALSE, 2, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _UNORM, _UNORM, _UNORM, _TYPELESS, FALSE, TRUE, }, { DXGI_FORMAT_Y216 ,DXGI_FORMAT_Y216, D3D11FCS_Y216, {0,0,0,0}, 32, FALSE, 2, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,G,B,X, _UNORM, _UNORM, _UNORM, _TYPELESS, FALSE, TRUE, }, // YUV 4:1:1 formats { DXGI_FORMAT_NV11 ,DXGI_FORMAT_NV11, D3D11FCS_NV11, {0,0,0,0}, 8, FALSE, 4, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, TRUE, TRUE, }, // Legacy substream formats { DXGI_FORMAT_AI44 ,DXGI_FORMAT_AI44, D3D11FCS_AI44, {0,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, TRUE, }, { DXGI_FORMAT_IA44 ,DXGI_FORMAT_IA44, D3D11FCS_IA44, {0,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, TRUE, }, { DXGI_FORMAT_P8 ,DXGI_FORMAT_P8, D3D11FCS_P8, {0,0,0,0}, 8, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, TRUE, }, { DXGI_FORMAT_A8P8 ,DXGI_FORMAT_A8P8, D3D11FCS_A8P8, {0,0,0,0}, 16, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_FULL_TYPE, R,X,X,X, _UNORM, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, TRUE, }, // { DXGI_FORMAT_B4G4R4A4_UNORM ,DXGI_FORMAT_B4G4R4A4_UNORM, D3D11FCS_B4G4R4A4, {4,4,4,4}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, B,G,R,A, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, { DXGI_FORMAT(116) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(117) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(118) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(119) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(120) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(121) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(122) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(123) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(124) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(125) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(126) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(127) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(128) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(129) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(130) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(131) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(132) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(133) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(134) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(135) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(136) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(137) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(138) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(139) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(140) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(141) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(142) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(143) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(144) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(145) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(146) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(147) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(148) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(149) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(150) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(151) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(152) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(153) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(154) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(155) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(156) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(157) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(158) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(159) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(160) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(161) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(162) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(163) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(164) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(165) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(166) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(167) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(168) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(169) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(170) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(171) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(172) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(173) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(174) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(175) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(176) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(177) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(178) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(179) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(180) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(181) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(182) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(183) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(184) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(185) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(186) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(187) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(188) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(189) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT(190) ,DXGI_FORMAT_UNKNOWN, D3D11FCS_UNKNOWN, {0,0,0,0}, 0, FALSE, 1, 1, 1, D3D11FL_CUSTOM, D3D11FTL_NO_TYPE, X,X,X,X, _TYPELESS, _TYPELESS, _TYPELESS, _TYPELESS, FALSE, FALSE, }, { DXGI_FORMAT_A4B4G4R4_UNORM ,DXGI_FORMAT_A4B4G4R4_UNORM, D3D11FCS_A4B4G4R4, {4,4,4,4}, 16, FALSE, 1, 1, 1, D3D11FL_STANDARD, D3D11FTL_FULL_TYPE, A,B,G,R, _UNORM, _UNORM, _UNORM, _UNORM, FALSE, FALSE, }, }; const UINT CD3D11FormatHelper::s_NumFormats = (sizeof(CD3D11FormatHelper::s_FormatDetail)/sizeof(CD3D11FormatHelper::FORMAT_DETAIL)); #if VALIDATE_FORMAT_ORDER #define FR( Format, A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD,AE,AF,AG,AH ) { Format, A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD,AE,AF,AG,AH } #else #define FR( Format, A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD,AE,AF,AG,AH ) { A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,AA,AB,AC,AD,AE,AF,AG,AH } #endif //--------------------------------------------------------------------------------------------------------------------------------- // GetDetailTableIndex UINT CD3D11FormatHelper::GetDetailTableIndex(DXGI_FORMAT Format ) { if( (UINT)Format < ARRAYSIZE( s_FormatDetail ) ) { assert( s_FormatDetail[(UINT)Format].DXGIFormat == Format ); return static_cast(Format); } return (UINT)-1; } //--------------------------------------------------------------------------------------------------------------------------------- // IsBlockCompressFormat - returns true if format is block compressed. This function is a helper function for GetBitsPerUnit and // if this function returns true then GetBitsPerUnit returns block size. bool CD3D11FormatHelper::IsBlockCompressFormat(DXGI_FORMAT Format) { // Returns true if BC1, BC2, BC3, BC4, BC5, BC6, BC7, or ASTC return (Format >= DXGI_FORMAT_BC1_TYPELESS && Format <= DXGI_FORMAT_BC5_SNORM) || (Format >= DXGI_FORMAT_BC6H_TYPELESS && Format <= DXGI_FORMAT_BC7_UNORM_SRGB); } //--------------------------------------------------------------------------------------------------------------------------------- // GetByteAlignment UINT CD3D11FormatHelper::GetByteAlignment(DXGI_FORMAT Format) { UINT bits = GetBitsPerUnit(Format); if (!IsBlockCompressFormat(Format)) { bits *= GetWidthAlignment(Format)*GetHeightAlignment(Format)*GetDepthAlignment(Format); } assert((bits & 0x7) == 0); // Unit must be byte-aligned return bits >> 3; } //---------------------------------------------------------------------------- // DivideAndRoundUp inline HRESULT DivideAndRoundUp(UINT dividend, UINT divisor, _Out_ UINT& result) { HRESULT hr = S_OK; UINT adjustedDividend; hr = UIntAdd(dividend, (divisor - 1), &adjustedDividend); result = SUCCEEDED(hr) ? (adjustedDividend / divisor) : 0; return hr; } //---------------------------------------------------------------------------- // CalculateExtraPlanarRows HRESULT CD3D11FormatHelper::CalculateExtraPlanarRows( DXGI_FORMAT format, UINT plane0Height, _Out_ UINT& totalHeight ) { // blockWidth, blockHeight, and blockSize only reflect the size of plane 0. Each planar format has additonal planes that must // be counted. Each format increases size by another 0.5x, 1x, or 2x. Grab the number of "half allocation" increments so integer // math can be used to calculate the extra size. UINT extraHalfHeight = 0; UINT round = 0; switch (format) { case DXGI_FORMAT_NV12: case DXGI_FORMAT_P010: case DXGI_FORMAT_P016: case DXGI_FORMAT_420_OPAQUE: extraHalfHeight = 1; round = 1; break; case DXGI_FORMAT_NV11: case DXGI_FORMAT_P208: extraHalfHeight = 2; round = 0; break; case DXGI_FORMAT_V208: extraHalfHeight = 2; round = 1; break; case DXGI_FORMAT_V408: extraHalfHeight = 4; round = 0; break; default: // Unhandled planar format. assert(false); break; } UINT extraPlaneHeight; if (FAILED(UIntMult(plane0Height, extraHalfHeight, &extraPlaneHeight)) || FAILED(UIntAdd(extraPlaneHeight, round, &extraPlaneHeight)) || FAILED(UIntAdd(plane0Height, (extraPlaneHeight >> 1), &totalHeight))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } return S_OK; } //---------------------------------------------------------------------------- // CalculateResourceSize HRESULT CD3D11FormatHelper::CalculateResourceSize( UINT width, UINT height, UINT depth, DXGI_FORMAT format, UINT mipLevels, UINT subresources, _Out_ SIZE_T& totalByteSize, _Out_writes_opt_(subresources) D3D11_MAPPED_SUBRESOURCE *pDst) { UINT tableIndex = GetDetailTableIndexNoThrow( format ); const FORMAT_DETAIL& formatDetail = s_FormatDetail[tableIndex]; bool fIsBlockCompressedFormat = IsBlockCompressFormat(format ); // No format currently requires depth alignment. assert(formatDetail.DepthAlignment == 1); UINT subWidth = width; UINT subHeight = height; UINT subDepth = depth; for (UINT s = 0, iM = 0, iA = 0; s < subresources; ++s) { UINT blockWidth; if (FAILED(DivideAndRoundUp(subWidth, formatDetail.WidthAlignment, /*_Out_*/ blockWidth))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } UINT blockSize, blockHeight; if (fIsBlockCompressedFormat) { if (FAILED(DivideAndRoundUp(subHeight, formatDetail.HeightAlignment, /*_Out_*/ blockHeight))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } // Block Compressed formats use BitsPerUnit as block size. blockSize = formatDetail.BitsPerUnit; } else { // The height must *not* be aligned to HeightAlign. As there is no plane pitch/stride, the expectation is that the 2nd plane // begins immediately after the first. The only formats with HeightAlignment other than 1 are planar or block compressed, and // block compressed is handled above. assert(formatDetail.bPlanar || formatDetail.HeightAlignment == 1); blockHeight = subHeight; // Combined with the division os subWidth by the width alignment above, this helps achieve rounding the stride up to an even multiple of // block width. This is especially important for formats like NV12 and P208 whose chroma plane is wider than the luma. blockSize = formatDetail.BitsPerUnit * formatDetail.WidthAlignment; } if (DXGI_FORMAT_UNKNOWN == formatDetail.DXGIFormat) { blockSize = 8; } // Convert block width size to bytes. assert((blockSize & 0x7) == 0); blockSize = blockSize >> 3; if (formatDetail.bPlanar) { if (FAILED(CalculateExtraPlanarRows(format, blockHeight, /*_Out_*/ blockHeight))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } } // Calculate rowPitch, depthPitch, and total subresource size. UINT rowPitch, depthPitch; SIZE_T subresourceByteSize; if ( FAILED(UIntMult(blockWidth, blockSize, &rowPitch)) || FAILED(UIntMult(blockHeight, rowPitch, &depthPitch)) || FAILED(SIZETMult(subDepth, depthPitch, &subresourceByteSize))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } if (pDst) { D3D11_MAPPED_SUBRESOURCE& dst = pDst[s]; // This data will be returned straight from the API to satisfy Map. So, strides/ alignment must be API-correct. dst.pData = reinterpret_cast(totalByteSize); assert(s != 0 || dst.pData == NULL); dst.RowPitch = rowPitch; dst.DepthPitch = depthPitch; } // Align the subresource size. static_assert((MAP_ALIGN_REQUIREMENT & (MAP_ALIGN_REQUIREMENT - 1)) == 0, "This code expects MAP_ALIGN_REQUIREMENT to be a power of 2."); SIZE_T subresourceByteSizeAligned; if (FAILED(SIZETAdd(subresourceByteSize, MAP_ALIGN_REQUIREMENT - 1, &subresourceByteSizeAligned))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } subresourceByteSizeAligned = subresourceByteSizeAligned & ~(MAP_ALIGN_REQUIREMENT - 1); if (FAILED(SIZETAdd(totalByteSize, subresourceByteSizeAligned, &totalByteSize))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } // Iterate over mip levels and array elements if (++iM >= mipLevels) { ++iA; iM = 0; subWidth = width; subHeight = height; subDepth = depth; } else { subWidth /= (1 == subWidth ? 1 : 2); subHeight /= (1 == subHeight ? 1 : 2); subDepth /= (1 == subDepth ? 1 : 2); } } return S_OK; } inline bool IsPow2( UINT Val ) { return 0 == (Val & (Val - 1)); } // This helper function calculates the Row Pitch for a given format. For Planar formats this function returns // the row major RowPitch of the resource. The RowPitch is the same for all the planes. For Planar // also use the CalculateExtraPlanarRows function to calculate the corresonding height or use the CalculateMinimumRowMajorSlicePitch // function. For Block Compressed Formats, this function returns the RowPitch of a row of blocks. For packed subsampled formats and other formats, // this function returns the row pitch of one single row of pixels. HRESULT CD3D11FormatHelper::CalculateMinimumRowMajorRowPitch(DXGI_FORMAT Format, UINT Width, _Out_ UINT &RowPitch) { // Early out for DXGI_FORMAT_UNKNOWN special case. if (Format == DXGI_FORMAT_UNKNOWN) { RowPitch = Width; return S_OK; } UINT WidthAlignment = GetWidthAlignment(Format); UINT NumUnits; if (IsBlockCompressFormat(Format)) { // This function calculates the minimum stride needed for a block row when the format // is block compressed.The GetBitsPerUnit value stored in the format table indicates // the size of a compressed block for block compressed formats. assert(WidthAlignment != 0); if (FAILED(DivideAndRoundUp(Width, WidthAlignment, NumUnits))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } } else { // All other formats must have strides aligned to their width alignment requirements. // The Width may not be aligned to the WidthAlignment. This is not an error for this // function as we expect to allow formats like NV12 to have odd dimensions in the future. // The following alignement code expects only pow2 alignment requirements. Only block // compressed formats currently have non-pow2 alignment requriements. assert(IsPow2(WidthAlignment)); UINT Mask = WidthAlignment - 1; if (FAILED(UIntAdd(Width, Mask, &NumUnits))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } NumUnits &= ~Mask; } if (FAILED(UIntMult(NumUnits, GetBitsPerUnit(Format), &RowPitch))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } // This must to always be Byte aligned. assert((RowPitch & 7) == 0); RowPitch >>= 3; return S_OK; } // This helper function calculates the SlicePitch for a given format. For Planar formats the slice pitch includes the extra // planes. HRESULT CD3D11FormatHelper::CalculateMinimumRowMajorSlicePitch(DXGI_FORMAT Format, UINT TightRowPitch, UINT Height, _Out_ UINT &SlicePitch) { if (Planar(Format)) { UINT PlanarHeight; if (FAILED(CalculateExtraPlanarRows(Format, Height, PlanarHeight))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } return UIntMult(TightRowPitch, PlanarHeight, &SlicePitch); } else if (Format == DXGI_FORMAT_UNKNOWN) { return UIntMult(TightRowPitch, Height, &SlicePitch); } UINT HeightAlignment = GetHeightAlignment(Format); // Caution assert to make sure that no new format breaks this assumtion that all HeightAlignment formats are BC or Planar. // This is to make sure that Height handled correctly for this calculation. assert(HeightAlignment == 1 || IsBlockCompressFormat(Format)); UINT HeightOfPacked; if (FAILED(DivideAndRoundUp(Height, HeightAlignment, HeightOfPacked))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } // Multiply by BitsPerUnit which for non block compressed formats has to be the bits per block and for all other non-planar formats // is bits per pixel. if (FAILED(UIntMult(HeightOfPacked, TightRowPitch, &SlicePitch))) { return INTSAFE_E_ARITHMETIC_OVERFLOW; } return S_OK; } //--------------------------------------------------------------------------------------------------------------------------------- // GetBitsPerUnit - returns bits per pixel unless format is a block compress format then it returns bits per block. // use IsBlockCompressFormat() to determine if block size is returned. UINT CD3D11FormatHelper::GetBitsPerUnit(DXGI_FORMAT Format) { return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].BitsPerUnit; } //--------------------------------------------------------------------------------------------------------------------------------- // GetBitsPerElement legacy function used to maintain 10on9 only. Do not use. UINT CD3D11FormatHelper::GetBitsPerElement(DXGI_FORMAT Format) { UINT bitsPerUnit = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].BitsPerUnit; if (IsBlockCompressFormat( Format )) { bitsPerUnit /= GetWidthAlignment( Format ) * GetHeightAlignment( Format ); } return bitsPerUnit; } //--------------------------------------------------------------------------------------------------------------------------------- // GetDetailTableIndexNoThrow UINT CD3D11FormatHelper::GetDetailTableIndexNoThrow(DXGI_FORMAT Format) { UINT Index = GetDetailTableIndex( Format ); assert( -1 != Index ); // Needs to be validated externally. return Index; } //--------------------------------------------------------------------------------------------------------------------------------- // GetNumComponentsInFormat UINT CD3D11FormatHelper::GetNumComponentsInFormat( DXGI_FORMAT Format ) { UINT n = 0; const UINT Index = GetDetailTableIndexNoThrow(Format); for( UINT comp = 0; comp < 4; comp++ ) { D3D11_FORMAT_COMPONENT_NAME name = D3D11FCN_D; switch(comp) { case 0: name = s_FormatDetail[Index].ComponentName0; break; case 1: name = s_FormatDetail[Index].ComponentName1; break; case 2: name = s_FormatDetail[Index].ComponentName2; break; case 3: name = s_FormatDetail[Index].ComponentName3; break; } if( name != D3D11FCN_X ) { n++; } } return n; } //--------------------------------------------------------------------------------------------------------------------------------- UINT CD3D11FormatHelper::GetWidthAlignment(DXGI_FORMAT Format) { return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].WidthAlignment; } UINT CD3D11FormatHelper::GetHeightAlignment(DXGI_FORMAT Format) { return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].HeightAlignment; } UINT CD3D11FormatHelper::GetDepthAlignment(DXGI_FORMAT Format) { return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].DepthAlignment; } //--------------------------------------------------------------------------------------------------------------------------------- // GetFormatDetail const CD3D11FormatHelper::FORMAT_DETAIL* CD3D11FormatHelper::GetFormatDetail( DXGI_FORMAT Format ) { const UINT Index = GetDetailTableIndex(Format); if( -1 == Index ) { return NULL; } return &s_FormatDetail[ Index ]; } //--------------------------------------------------------------------------------------------------------------------------------- // IsSRGBFormat bool CD3D11FormatHelper::IsSRGBFormat(DXGI_FORMAT Format) { const UINT Index = GetDetailTableIndex(Format); if( -1 == Index ) { return false; } return s_FormatDetail[Index].SRGBFormat ? true : false; } //--------------------------------------------------------------------------------------------------------------------------------- // GetParentFormat DXGI_FORMAT CD3D11FormatHelper::GetParentFormat(DXGI_FORMAT Format) { return s_FormatDetail[Format].ParentFormat; } //--------------------------------------------------------------------------------------------------------------------------------- // GetFormatCastSet const DXGI_FORMAT* CD3D11FormatHelper::GetFormatCastSet(DXGI_FORMAT Format) { return s_FormatDetail[Format].pDefaultFormatCastSet; } //--------------------------------------------------------------------------------------------------------------------------------- // GetTypeLevel D3D11_FORMAT_TYPE_LEVEL CD3D11FormatHelper::GetTypeLevel(DXGI_FORMAT Format) { return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].TypeLevel; } //--------------------------------------------------------------------------------------------------------------------------------- // GetComponentName D3D11_FORMAT_COMPONENT_NAME CD3D11FormatHelper::GetComponentName(DXGI_FORMAT Format, UINT AbsoluteComponentIndex) { D3D11_FORMAT_COMPONENT_NAME name; switch( AbsoluteComponentIndex ) { case 0: name = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentName0; break; case 1: name = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentName1; break; case 2: name = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentName2; break; case 3: name = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentName3; break; default: throw E_FAIL; } return name; } //--------------------------------------------------------------------------------------------------------------------------------- // GetBitsPerComponent UINT CD3D11FormatHelper::GetBitsPerComponent(DXGI_FORMAT Format, UINT AbsoluteComponentIndex) { if( AbsoluteComponentIndex > 3 ) { throw E_FAIL; } return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].BitsPerComponent[AbsoluteComponentIndex]; } //--------------------------------------------------------------------------------------------------------------------------------- // GetFormatComponentInterpretation D3D11_FORMAT_COMPONENT_INTERPRETATION CD3D11FormatHelper::GetFormatComponentInterpretation(DXGI_FORMAT Format, UINT AbsoluteComponentIndex) { D3D11_FORMAT_COMPONENT_INTERPRETATION interp; SecureZeroMemory(&interp, sizeof(interp)); switch( AbsoluteComponentIndex ) { case 0: interp = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentInterpretation0; break; case 1: interp = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentInterpretation1; break; case 2: interp = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentInterpretation2; break; case 3: interp = s_FormatDetail[GetDetailTableIndexNoThrow( Format )].ComponentInterpretation3; break; // default: throw E_FAIL; } return interp; } //--------------------------------------------------------------------------------------------------------------------------------- // Planar BOOL CD3D11FormatHelper::Planar(DXGI_FORMAT Format) { return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].bPlanar; } //--------------------------------------------------------------------------------------------------------------------------------- // Non-opaque Planar BOOL CD3D11FormatHelper::NonOpaquePlanar(DXGI_FORMAT Format) { return Planar(Format) && !Opaque(Format); } //--------------------------------------------------------------------------------------------------------------------------------- // YUV BOOL CD3D11FormatHelper::YUV(DXGI_FORMAT Format) { return s_FormatDetail[GetDetailTableIndexNoThrow( Format )].bYUV; } //--------------------------------------------------------------------------------------------------------------------------------- // Format family supports stencil bool CD3D11FormatHelper::FamilySupportsStencil(DXGI_FORMAT Format) { switch( GetParentFormat(Format) ) { case DXGI_FORMAT_R32G8X24_TYPELESS: case DXGI_FORMAT_R24G8_TYPELESS: return true; } return false; } //--------------------------------------------------------------------------------------------------------------------------------- // GetYCbCrChromaSubsampling void CD3D11FormatHelper::GetYCbCrChromaSubsampling( DXGI_FORMAT Format, _Out_ UINT& HorizontalSubsampling, _Out_ UINT& VerticalSubsampling ) { switch( Format) { // YCbCr 4:2:0 case DXGI_FORMAT_NV12: case DXGI_FORMAT_P010: case DXGI_FORMAT_P016: case DXGI_FORMAT_420_OPAQUE: HorizontalSubsampling = 2; VerticalSubsampling = 2; break; // YCbCr 4:2:2 case DXGI_FORMAT_P208: case DXGI_FORMAT_YUY2: case DXGI_FORMAT_Y210: HorizontalSubsampling = 2; VerticalSubsampling = 1; break; // YCbCr 4:4:0 case DXGI_FORMAT_V208: HorizontalSubsampling = 1; VerticalSubsampling = 2; break; // YCbCr 4:4:4 case DXGI_FORMAT_AYUV: case DXGI_FORMAT_V408: case DXGI_FORMAT_Y410: case DXGI_FORMAT_Y416: // Fallthrough // YCbCr palletized 4:4:4: case DXGI_FORMAT_AI44: case DXGI_FORMAT_IA44: case DXGI_FORMAT_P8: case DXGI_FORMAT_A8P8: HorizontalSubsampling = 1; VerticalSubsampling = 1; break; // YCbCr 4:1:1 case DXGI_FORMAT_NV11: HorizontalSubsampling = 4; VerticalSubsampling = 1; break; default: // All YCbCr formats should be in this list. assert( !YUV(Format) ); HorizontalSubsampling = 1; VerticalSubsampling = 1; break; }; } //--------------------------------------------------------------------------------------------------------------------------------- // Plane count for non-opaque planar formats UINT CD3D11FormatHelper::NonOpaquePlaneCount(DXGI_FORMAT Format) { if (!CD3D11FormatHelper::NonOpaquePlanar(Format)) { return 1; } // V208 and V408 are the only 3-plane formats. return (Format == DXGI_FORMAT_V208 || Format == DXGI_FORMAT_V408) ? 3 : 2; } //--------------------------------------------------------------------------------------------------------------------------------- // GetTileShape // // Retrieve Tiled Resource tile shape void CD3D11FormatHelper::GetTileShape( D3D11_TILE_SHAPE* pTileShape, DXGI_FORMAT Format, D3D11_RESOURCE_DIMENSION Dimension, UINT SampleCount ) { UINT BPU = GetBitsPerUnit(Format); switch(Dimension) { case D3D11_RESOURCE_DIMENSION_BUFFER: case D3D11_RESOURCE_DIMENSION_TEXTURE1D: { pTileShape->WidthInTexels = (BPU == 0) ? D3D11_2_TILED_RESOURCE_TILE_SIZE_IN_BYTES : D3D11_2_TILED_RESOURCE_TILE_SIZE_IN_BYTES*8 / BPU; pTileShape->HeightInTexels = 1; pTileShape->DepthInTexels = 1; } break; case D3D11_RESOURCE_DIMENSION_TEXTURE2D: { if (IsBlockCompressFormat(Format)) { // Currently only supported block sizes are 64 and 128. // These equations calculate the size in texels for a tile. It relies on the fact that 64 * 64 blocks fit in a tile if the block size is 128 bits. assert(BPU == 64 || BPU == 128); pTileShape->WidthInTexels = 64 * GetWidthAlignment(Format); pTileShape->HeightInTexels = 64 * GetHeightAlignment(Format); pTileShape->DepthInTexels = 1; if (BPU == 64) { // If bits per block are 64 we double width so it takes up the full tile size. assert((Format >= DXGI_FORMAT_BC1_TYPELESS && Format <= DXGI_FORMAT_BC1_UNORM_SRGB) || (Format >= DXGI_FORMAT_BC4_TYPELESS && Format <= DXGI_FORMAT_BC4_SNORM)); pTileShape->WidthInTexels *= 2; } } else { // Not a block format so BPU is bits per pixel. pTileShape->DepthInTexels = 1; switch(BPU) { case 8: pTileShape->WidthInTexels = 256; pTileShape->HeightInTexels = 256; break; case 16: pTileShape->WidthInTexels = 256; pTileShape->HeightInTexels = 128; break; case 32: pTileShape->WidthInTexels = 128; pTileShape->HeightInTexels = 128; break; case 64: pTileShape->WidthInTexels = 128; pTileShape->HeightInTexels = 64; break; case 128: pTileShape->WidthInTexels = 64; pTileShape->HeightInTexels = 64; break; } switch(SampleCount) { case 1: break; case 2: pTileShape->WidthInTexels /= 2; pTileShape->HeightInTexels /= 1; break; case 4: pTileShape->WidthInTexels /= 2; pTileShape->HeightInTexels /= 2; break; case 8: pTileShape->WidthInTexels /= 4; pTileShape->HeightInTexels /= 2; break; case 16: pTileShape->WidthInTexels /= 4; pTileShape->HeightInTexels /= 4; break; default: ASSUME(false); } } break; } case D3D11_RESOURCE_DIMENSION_TEXTURE3D: { if (IsBlockCompressFormat(Format)) { // Currently only supported block sizes are 64 and 128. // These equations calculate the size in texels for a tile. It relies on the fact that 16*16*16 blocks fit in a tile if the block size is 128 bits. assert(BPU == 64 || BPU == 128); pTileShape->WidthInTexels = 16 * GetWidthAlignment(Format); pTileShape->HeightInTexels = 16 * GetHeightAlignment(Format); pTileShape->DepthInTexels = 16 * GetDepthAlignment(Format); if (BPU == 64) { // If bits per block are 64 we double width so it takes up the full tile size. assert((Format >= DXGI_FORMAT_BC1_TYPELESS && Format <= DXGI_FORMAT_BC1_UNORM_SRGB) || (Format >= DXGI_FORMAT_BC4_TYPELESS && Format <= DXGI_FORMAT_BC4_SNORM)); pTileShape->WidthInTexels *= 2; } } else if (Format == DXGI_FORMAT_R8G8_B8G8_UNORM || Format == DXGI_FORMAT_G8R8_G8B8_UNORM) { //RGBG and GRGB are treated as 2x1 block format pTileShape->WidthInTexels = 64; pTileShape->HeightInTexels = 32; pTileShape->DepthInTexels = 16; } else { // Not a block format so BPU is bits per pixel. assert(GetWidthAlignment(Format) == 1 && GetHeightAlignment(Format) == 1 && GetDepthAlignment(Format)); switch(BPU) { case 8: pTileShape->WidthInTexels = 64; pTileShape->HeightInTexels = 32; pTileShape->DepthInTexels = 32; break; case 16: pTileShape->WidthInTexels = 32; pTileShape->HeightInTexels = 32; pTileShape->DepthInTexels = 32; break; case 32: pTileShape->WidthInTexels = 32; pTileShape->HeightInTexels = 32; pTileShape->DepthInTexels = 16; break; case 64: pTileShape->WidthInTexels = 32; pTileShape->HeightInTexels = 16; pTileShape->DepthInTexels = 16; break; case 128: pTileShape->WidthInTexels = 16; pTileShape->HeightInTexels = 16; pTileShape->DepthInTexels = 16; break; } } break; } } } // End of file ================================================ FILE: src/ImmediateContext.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include "PrecompiledShaders.h" namespace D3D12TranslationLayer { //================================================================================================================================== // //================================================================================================================================== void ImmediateContext::SStageState::ClearState(EShaderStage stage) noexcept { m_CBs.Clear(stage); m_SRVs.Clear(stage); m_Samplers.Clear(); } void ImmediateContext::SState::ClearState() noexcept { for (EShaderStage stage = (EShaderStage)0; stage < ShaderStageCount; stage = (EShaderStage)(stage + 1)) { GetStageState(stage).ClearState(stage); } m_UAVs.Clear(e_Graphics); m_CSUAVs.Clear(e_Compute); m_RTVs.Clear(e_Graphics); m_DSVs.Clear(e_Graphics); m_VBs.Clear(e_Graphics); m_IB.Clear(e_Graphics); m_SO.Clear(e_Graphics); m_pPredicate = nullptr; m_pPSO = nullptr; } ImmediateContext::SStageState& ImmediateContext::SState::GetStageState(EShaderStage stage) noexcept { switch(stage) { case e_PS: return m_PS; case e_VS: return m_VS; case e_GS: return m_GS; case e_HS: return m_HS; case e_DS: return m_DS; case e_CS: return m_CS; default: ASSUME(false); } } //---------------------------------------------------------------------------------------------------------------------------------- ImmediateContext::ImmediateContext(UINT nodeIndex, D3D12_FEATURE_DATA_D3D12_OPTIONS& caps, ID3D12Device* pDevice, ID3D12CommandQueue* pQueue, TranslationLayerCallbacks const& callbacks, UINT64 debugFlags, CreationArgs args) noexcept(false) : m_nodeIndex(nodeIndex) , m_caps(caps) , m_FeatureLevel(GetHardwareFeatureLevel(pDevice)) , m_pDevice12(pDevice) , m_SRVAllocator(pDevice, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1024, args.CreatesAndDestroysAreMultithreaded, 1 << nodeIndex) , m_UAVAllocator(pDevice, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1024, args.CreatesAndDestroysAreMultithreaded, 1 << nodeIndex) , m_RTVAllocator(pDevice, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 64, args.CreatesAndDestroysAreMultithreaded, 1 << nodeIndex) , m_DSVAllocator(pDevice, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 64, args.CreatesAndDestroysAreMultithreaded, 1 << nodeIndex) , m_SamplerAllocator(pDevice, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 64, args.CreatesAndDestroysAreMultithreaded, 1 << nodeIndex) , m_ResourceCache(*this) , m_DirtyStates(e_DirtyOnFirstCommandList) , m_StatesToReassert(e_ReassertOnNewCommandList) , m_UploadBufferPool(min(max((UINT64)args.BufferPoolTrimThreshold, m_MinBufferPoolTrimThreshold), m_MaxBufferPoolTrimThreshold), args.CreatesAndDestroysAreMultithreaded) , m_ReadbackBufferPool(min(max((UINT64)args.BufferPoolTrimThreshold, m_MinBufferPoolTrimThreshold), m_MaxBufferPoolTrimThreshold), args.CreatesAndDestroysAreMultithreaded) , m_DecoderBufferPool(min(max((UINT64)args.BufferPoolTrimThreshold, m_MinBufferPoolTrimThreshold), m_MaxBufferPoolTrimThreshold), args.CreatesAndDestroysAreMultithreaded) , m_uStencilRef(0) , m_PrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_UNDEFINED) , m_PredicateValue(false) , m_IndexBufferFormat(DXGI_FORMAT_UNKNOWN) , m_uNumScissors(0) , m_uNumViewports(0) , m_ScissorRectEnable(false) , m_uIndexBufferOffset(0) , m_callbacks(callbacks) , m_GenerateMipsRootSig(this) , m_InternalUAVRootSig(this) , m_DeferredDeletionQueueManager(this) , m_UploadHeapSuballocator( std::forward_as_tuple(cBuddyMaxBlockSize, cBuddyAllocatorThreshold, (bool)args.CreatesAndDestroysAreMultithreaded, this, AllocatorHeapType::Upload), std::forward_as_tuple(this, AllocatorHeapType::Upload), ResourceNeedsOwnAllocation) , m_ReadbackHeapSuballocator( std::forward_as_tuple(cBuddyMaxBlockSize, cBuddyAllocatorThreshold, (bool)args.CreatesAndDestroysAreMultithreaded, this, AllocatorHeapType::Readback), std::forward_as_tuple(this, AllocatorHeapType::Readback), ResourceNeedsOwnAllocation) , m_DecoderHeapSuballocator( std::forward_as_tuple(cBuddyMaxBlockSize, cBuddyAllocatorThreshold, (bool)args.CreatesAndDestroysAreMultithreaded, this, AllocatorHeapType::Decoder), std::forward_as_tuple(this, AllocatorHeapType::Decoder), ResourceNeedsOwnAllocation) , m_CreationArgs(args) , m_ResourceStateManager(*this) #if DBG , m_DebugFlags(debugFlags) #endif , m_bUseRingBufferDescriptorHeaps(args.IsXbox) , m_BltResolveManager(*this) , m_residencyManager(*this) , m_architecture(QueryArchitectureFlags()) { UNREFERENCED_PARAMETER(debugFlags); memset(m_BlendFactor, 0, sizeof(m_BlendFactor)); memset(m_auVertexOffsets, 0, sizeof(m_auVertexOffsets)); memset(m_auVertexStrides, 0, sizeof(m_auVertexStrides)); memset(m_aScissors, 0, sizeof(m_aScissors)); memset(m_aViewports, 0, sizeof(m_aViewports)); HRESULT hr = S_OK; if (!m_CreationArgs.UseResidencyManagement) { // Residency management is no longer optional ThrowFailure(E_INVALIDARG); } if (m_CreationArgs.RenamingIsMultithreaded) { m_RenamesInFlight.InitLock(); } if (m_CreationArgs.UseThreadpoolForPSOCreates) { m_spPSOCompilationThreadPool.reset(new CThreadPool); } if (m_CreationArgs.CreatesAndDestroysAreMultithreaded) { m_DeferredDeletionQueueManager.InitLock(); } m_MaxFrameLatencyHelper.Init(this); D3D12TranslationLayer::InitializeListHead(&m_ActiveQueryList); D3D12_COMMAND_QUEUE_DESC SyncOnlyQueueDesc = { D3D12_COMMAND_LIST_TYPE_NONE }; (void)m_pDevice12->CreateCommandQueue(&SyncOnlyQueueDesc, IID_PPV_ARGS(&m_pSyncOnlyQueue)); LUID adapterLUID = pDevice->GetAdapterLuid(); { CComPtr pFactory; #if DYNAMIC_LOAD_DXCORE m_DXCore.load("dxcore"); auto pfnDXCoreCreateAdapterFactory = m_DXCore.proc_address("DXCoreCreateAdapterFactory"); if (m_DXCore && pfnDXCoreCreateAdapterFactory && SUCCEEDED(pfnDXCoreCreateAdapterFactory(IID_PPV_ARGS(&pFactory)))) #else if (SUCCEEDED(DXCoreCreateAdapterFactory(IID_PPV_ARGS(&pFactory)))) #endif { (void)pFactory->GetAdapterByLuid(adapterLUID, IID_PPV_ARGS(&m_pDXCoreAdapter)); } } if (!m_pDXCoreAdapter) { CComPtr pFactory; ThrowFailure(CreateDXGIFactory2(0, IID_PPV_ARGS(&pFactory))); ThrowFailure(pFactory->EnumAdapterByLuid(adapterLUID, IID_PPV_ARGS(&m_pDXGIAdapter))); } m_residencyManager.Initialize(nodeIndex, m_pDXCoreAdapter.get(), m_pDXGIAdapter.get()); m_UAVDeclScratch.reserve(D3D11_1_UAV_SLOT_COUNT); // throw( bad_alloc ) m_vUAVBarriers.reserve(D3D11_1_UAV_SLOT_COUNT); // throw( bad_alloc ) m_ViewHeap.m_MaxHeapSize = min((DWORD) D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1, m_CreationArgs.MaxSRVHeapSize); if (m_ViewHeap.m_MaxHeapSize == 0) m_ViewHeap.m_MaxHeapSize = D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1; const UINT32 viewHeapStartingCount = m_bUseRingBufferDescriptorHeaps ? 4096 : m_ViewHeap.m_MaxHeapSize; m_ViewHeap.m_DescriptorRingBuffer = CFencedRingBuffer(viewHeapStartingCount); m_ViewHeap.m_Desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; m_ViewHeap.m_Desc.NumDescriptors = viewHeapStartingCount; m_ViewHeap.m_Desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; m_ViewHeap.m_Desc.NodeMask = GetNodeMask(); m_SamplerHeap.m_MaxHeapSize = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; const UINT32 samplerHeapStartingCount = m_bUseRingBufferDescriptorHeaps ? 512 : m_SamplerHeap.m_MaxHeapSize; m_SamplerHeap.m_DescriptorRingBuffer = CFencedRingBuffer(samplerHeapStartingCount); m_SamplerHeap.m_Desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; m_SamplerHeap.m_Desc.NumDescriptors = samplerHeapStartingCount; m_SamplerHeap.m_Desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; m_SamplerHeap.m_Desc.NodeMask = GetNodeMask(); //Fetch additional caps m_options13 = {}; pDevice->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS13, &m_options13, sizeof(m_options13)); // Create initial objects hr = m_pDevice12->CreateDescriptorHeap(&m_ViewHeap.m_Desc, IID_PPV_ARGS(&m_ViewHeap.m_pDescriptorHeap)); ThrowFailure(hr); //throw( _com_error ) m_ViewHeap.m_DescriptorSize = m_pDevice12->GetDescriptorHandleIncrementSize(m_ViewHeap.m_Desc.Type); m_ViewHeap.m_DescriptorHeapBase = m_ViewHeap.m_pDescriptorHeap->GetGPUDescriptorHandleForHeapStart().ptr; m_ViewHeap.m_DescriptorHeapBaseCPU = m_ViewHeap.m_pDescriptorHeap->GetCPUDescriptorHandleForHeapStart().ptr; m_ViewHeap.m_BitsToSetOnNewHeap = e_ViewsDirty; if (!ComputeOnly()) { hr = m_pDevice12->CreateDescriptorHeap(&m_SamplerHeap.m_Desc, IID_PPV_ARGS(&m_SamplerHeap.m_pDescriptorHeap)); ThrowFailure(hr); //throw( _com_error ) m_SamplerHeap.m_DescriptorSize = m_pDevice12->GetDescriptorHandleIncrementSize(m_SamplerHeap.m_Desc.Type); m_SamplerHeap.m_DescriptorHeapBase = m_SamplerHeap.m_pDescriptorHeap->GetGPUDescriptorHandleForHeapStart().ptr; m_SamplerHeap.m_DescriptorHeapBaseCPU = m_SamplerHeap.m_pDescriptorHeap->GetCPUDescriptorHandleForHeapStart().ptr; m_SamplerHeap.m_BitsToSetOnNewHeap = e_SamplersDirty; } for (UINT i = 0; i <= (UINT)RESOURCE_DIMENSION::TEXTURECUBEARRAY; ++i) { auto ResourceDimension = ComputeOnly() ? RESOURCE_DIMENSION::BUFFER : (RESOURCE_DIMENSION)i; D3D12_SHADER_RESOURCE_VIEW_DESC NullSRVDesc = {}; D3D12_UNORDERED_ACCESS_VIEW_DESC NullUAVDesc = {}; NullSRVDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; NullSRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; NullUAVDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; switch (ResourceDimension) { case RESOURCE_DIMENSION::BUFFER: case RESOURCE_DIMENSION::UNKNOWN: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; NullSRVDesc.Buffer.FirstElement = 0; NullSRVDesc.Buffer.NumElements = 0; NullSRVDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; NullSRVDesc.Buffer.StructureByteStride = 0; NullUAVDesc.Buffer.FirstElement = 0; NullUAVDesc.Buffer.NumElements = 0; NullUAVDesc.Buffer.StructureByteStride = 0; NullUAVDesc.Buffer.CounterOffsetInBytes = 0; NullUAVDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; if (ComputeOnly()) { // Compute only will use a raw view instead of typed NullSRVDesc.Format = DXGI_FORMAT_R32_TYPELESS; NullUAVDesc.Format = DXGI_FORMAT_R32_TYPELESS; NullSRVDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; NullUAVDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; } break; case RESOURCE_DIMENSION::TEXTURE1D: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; NullSRVDesc.Texture1D.MipLevels = 1; NullSRVDesc.Texture1D.MostDetailedMip = 0; NullSRVDesc.Texture1D.ResourceMinLODClamp = 0.0f; NullUAVDesc.Texture1D.MipSlice = 0; break; case RESOURCE_DIMENSION::TEXTURE1DARRAY: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1DARRAY; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1DARRAY; NullSRVDesc.Texture1DArray.MipLevels = 1; NullSRVDesc.Texture1DArray.ArraySize = 1; NullSRVDesc.Texture1DArray.MostDetailedMip = 0; NullSRVDesc.Texture1DArray.FirstArraySlice = 0; NullSRVDesc.Texture1DArray.ResourceMinLODClamp = 0.0f; NullUAVDesc.Texture1DArray.ArraySize = 1; NullUAVDesc.Texture1DArray.MipSlice = 0; NullUAVDesc.Texture1DArray.FirstArraySlice = 0; break; case RESOURCE_DIMENSION::TEXTURE2D: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; NullSRVDesc.Texture2D.MipLevels = 1; NullSRVDesc.Texture2D.MostDetailedMip = 0; NullSRVDesc.Texture2D.PlaneSlice = 0; NullSRVDesc.Texture2D.ResourceMinLODClamp = 0.0f; NullUAVDesc.Texture2D.MipSlice = 0; NullUAVDesc.Texture2D.PlaneSlice = 0; break; case RESOURCE_DIMENSION::TEXTURE2DARRAY: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; NullSRVDesc.Texture2DArray.MipLevels = 1; NullSRVDesc.Texture2DArray.ArraySize = 1; NullSRVDesc.Texture2DArray.MostDetailedMip = 0; NullSRVDesc.Texture2DArray.FirstArraySlice = 0; NullSRVDesc.Texture2DArray.PlaneSlice = 0; NullSRVDesc.Texture2DArray.ResourceMinLODClamp = 0.0f; NullUAVDesc.Texture2DArray.ArraySize = 1; NullUAVDesc.Texture2DArray.MipSlice = 0; NullUAVDesc.Texture2DArray.FirstArraySlice = 0; NullUAVDesc.Texture2DArray.PlaneSlice = 0; break; case RESOURCE_DIMENSION::TEXTURE2DMS: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; break; case RESOURCE_DIMENSION::TEXTURE2DMSARRAY: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; NullSRVDesc.Texture2DMSArray.ArraySize = 1; NullSRVDesc.Texture2DMSArray.FirstArraySlice = 0; break; case RESOURCE_DIMENSION::TEXTURE3D: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; NullSRVDesc.Texture3D.MipLevels = 1; NullSRVDesc.Texture3D.MostDetailedMip = 0; NullSRVDesc.Texture3D.ResourceMinLODClamp = 0.0f; NullUAVDesc.Texture3D.WSize = 1; NullUAVDesc.Texture3D.MipSlice = 0; NullUAVDesc.Texture3D.FirstWSlice = 0; break; case RESOURCE_DIMENSION::TEXTURECUBE: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; NullSRVDesc.TextureCube.MipLevels = 1; NullSRVDesc.TextureCube.MostDetailedMip = 0; NullSRVDesc.TextureCube.ResourceMinLODClamp = 0.0f; break; case RESOURCE_DIMENSION::TEXTURECUBEARRAY: NullSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBEARRAY; NullUAVDesc.ViewDimension = D3D12_UAV_DIMENSION_UNKNOWN; NullSRVDesc.TextureCubeArray.MipLevels = 1; NullSRVDesc.TextureCubeArray.NumCubes = 1; NullSRVDesc.TextureCubeArray.MostDetailedMip = 0; NullSRVDesc.TextureCubeArray.First2DArrayFace = 0; NullSRVDesc.TextureCubeArray.ResourceMinLODClamp = 0.0f; break; } if (NullSRVDesc.ViewDimension != D3D12_SRV_DIMENSION_UNKNOWN) { m_NullSRVs[i] = m_SRVAllocator.AllocateHeapSlot(); // throw( _com_error ) m_pDevice12->CreateShaderResourceView(nullptr, &NullSRVDesc, m_NullSRVs[i]); } if (NullUAVDesc.ViewDimension != D3D12_UAV_DIMENSION_UNKNOWN) { m_NullUAVs[i] = m_UAVAllocator.AllocateHeapSlot(); // throw( _com_error ) m_pDevice12->CreateUnorderedAccessView(nullptr, nullptr, &NullUAVDesc, m_NullUAVs[i]); } } if (!ComputeOnly()) { m_NullRTV = m_RTVAllocator.AllocateHeapSlot(); // throw( _com_error ) D3D12_RENDER_TARGET_VIEW_DESC NullRTVDesc; NullRTVDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; NullRTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; NullRTVDesc.Texture2D.MipSlice = 0; NullRTVDesc.Texture2D.PlaneSlice = 0; m_pDevice12->CreateRenderTargetView(nullptr, &NullRTVDesc, m_NullRTV); } if (!ComputeOnly()) { m_NullSampler = m_SamplerAllocator.AllocateHeapSlot(); // throw( _com_error ) // Arbitrary parameters used, this sampler should never actually be used D3D12_SAMPLER_DESC NullSamplerDesc; NullSamplerDesc.Filter = D3D12_FILTER_ANISOTROPIC; NullSamplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; NullSamplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; NullSamplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; NullSamplerDesc.MipLODBias = 0.0f; NullSamplerDesc.MaxAnisotropy = 0; NullSamplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; NullSamplerDesc.MinLOD = 0.0f; NullSamplerDesc.MaxLOD = 0.0f; memset(NullSamplerDesc.BorderColor, 0, sizeof(NullSamplerDesc.BorderColor)); m_pDevice12->CreateSampler(&NullSamplerDesc, m_NullSampler); } (void)m_pDevice12->QueryInterface(&m_pDevice12_1); (void)m_pDevice12->QueryInterface(&m_pDevice12_2); m_pDevice12->QueryInterface(&m_pCompatDevice); m_CommandLists[(UINT)COMMAND_LIST_TYPE::GRAPHICS].reset(new CommandListManager(this, pQueue, COMMAND_LIST_TYPE::GRAPHICS)); // throw( bad_alloc ) m_CommandLists[(UINT)COMMAND_LIST_TYPE::GRAPHICS]->InitCommandList(); } bool ImmediateContext::Shutdown() noexcept { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if (m_CommandLists[i]) { // The device is being destroyed so no point executing any authored work m_CommandLists[i]->DiscardCommandList(); // Make sure any GPU work still in the pipe is finished try { if (!m_CommandLists[i]->WaitForCompletion()) // throws { return false; } } catch (_com_error&) { return false; } catch (std::bad_alloc&) { return false; } } } return true; } //---------------------------------------------------------------------------------------------------------------------------------- ImmediateContext::~ImmediateContext() noexcept { Shutdown(); //Ensure all remaining allocations are cleaned up TrimDeletedObjects(true); // All queries should be gone by this point assert(D3D12TranslationLayer::IsListEmpty(&m_ActiveQueryList)); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::InitializeVideo(ID3D12VideoDevice **ppVideoDevice) { m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_DECODE].reset(new CommandListManager(this, nullptr, COMMAND_LIST_TYPE::VIDEO_DECODE)); // throw( bad_alloc ) m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_DECODE]->InitCommandList(); m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_PROCESS].reset(new CommandListManager(this, nullptr, COMMAND_LIST_TYPE::VIDEO_PROCESS)); // throw( bad_alloc ) m_CommandLists[(UINT)COMMAND_LIST_TYPE::VIDEO_PROCESS]->InitCommandList(); ThrowFailure(m_pDevice12_1->QueryInterface(ppVideoDevice)); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::AddResourceToDeferredDeletionQueue(ID3D12Object* pUnderlying, std::unique_ptr &&pResidencyHandle, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], bool completionRequired, std::vector deferredWaits) { // Note: Due to the below routines being called after deferred deletion queue destruction, // all callers of the generic AddObjectToQueue should ensure that the object really needs to be in the queue. if (!RetiredD3D12Object::ReadyToDestroy(this, completionRequired, lastCommandListIDs, deferredWaits)) { m_DeferredDeletionQueueManager.GetLocked()->AddObjectToQueue(pUnderlying, std::move(pResidencyHandle), lastCommandListIDs, completionRequired, std::move(deferredWaits)); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::AddObjectToDeferredDeletionQueue(ID3D12Object* pUnderlying, COMMAND_LIST_TYPE commandListType, UINT64 lastCommandListID, bool completionRequired) { // Note: May be called after the deferred deletion queue has been destroyed, but in all such cases, // the ReadyToDestroy function will return true. if (!RetiredD3D12Object::ReadyToDestroy(this, completionRequired, lastCommandListID, commandListType)) { std::unique_ptr nullUniquePtr; m_DeferredDeletionQueueManager.GetLocked()->AddObjectToQueue(pUnderlying, std::move(nullUniquePtr), commandListType, lastCommandListID, completionRequired); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::AddObjectToDeferredDeletionQueue(ID3D12Object* pUnderlying, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], bool completionRequired) { // Note: May be called after the deferred deletion queue has been destroyed, but in all such cases, // the ReadyToDestroy function will return true. if (!RetiredD3D12Object::ReadyToDestroy(this, completionRequired, lastCommandListIDs)) { std::unique_ptr nullUniquePtr; m_DeferredDeletionQueueManager.GetLocked()->AddObjectToQueue(pUnderlying, std::move(nullUniquePtr), lastCommandListIDs, completionRequired); } } //---------------------------------------------------------------------------------------------------------------------------------- bool DeferredDeletionQueueManager::TrimDeletedObjects(bool deviceBeingDestroyed) { bool AnyObjectsDestroyed = false; while (m_DeferredObjectDeletionQueue.empty() == false && (m_DeferredObjectDeletionQueue.front().ReadyToDestroy(m_pParent) || deviceBeingDestroyed)) { AnyObjectsDestroyed = true; m_DeferredObjectDeletionQueue.pop(); } while (SuballocationsReadyToBeDestroyed(deviceBeingDestroyed)) { AnyObjectsDestroyed = true; m_DeferredSuballocationDeletionQueue.front().Destroy(); m_DeferredSuballocationDeletionQueue.pop(); } return AnyObjectsDestroyed; } //---------------------------------------------------------------------------------------------------------------------------------- bool DeferredDeletionQueueManager::GetFenceValuesForObjectDeletion(UINT64(&FenceValues)[(UINT)COMMAND_LIST_TYPE::MAX_VALID]) { std::fill(FenceValues, std::end(FenceValues), 0ull); if (!m_DeferredObjectDeletionQueue.empty()) { auto& obj = m_DeferredObjectDeletionQueue.front(); std::copy(obj.m_lastCommandListIDs, std::end(obj.m_lastCommandListIDs), FenceValues); return true; } return false; } bool DeferredDeletionQueueManager::GetFenceValuesForSuballocationDeletion(UINT64(&FenceValues)[(UINT)COMMAND_LIST_TYPE::MAX_VALID]) { std::fill(FenceValues, std::end(FenceValues), 0ull); if (!m_DeferredSuballocationDeletionQueue.empty()) { auto& suballocation = m_DeferredSuballocationDeletionQueue.front(); std::copy(suballocation.m_lastCommandListIDs, std::end(suballocation.m_lastCommandListIDs), FenceValues); return true; } return false; } bool DeferredDeletionQueueManager::SuballocationsReadyToBeDestroyed(bool deviceBeingDestroyed) { return m_DeferredSuballocationDeletionQueue.empty() == false && (m_DeferredSuballocationDeletionQueue.front().ReadyToDestroy(m_pParent) || deviceBeingDestroyed); } bool ImmediateContext::TrimDeletedObjects(bool deviceBeingDestroyed) { return m_DeferredDeletionQueueManager.GetLocked()->TrimDeletedObjects(deviceBeingDestroyed); } bool ImmediateContext::TrimResourcePools() { m_UploadBufferPool.Trim(GetCompletedFenceValue(CommandListType(AllocatorHeapType::Upload))); m_ReadbackBufferPool.Trim(GetCompletedFenceValue(CommandListType(AllocatorHeapType::Readback))); m_DecoderBufferPool.Trim(GetCompletedFenceValue(CommandListType(AllocatorHeapType::Decoder))); return true; } void TRANSLATION_API ImmediateContext::PostSubmitNotification() { if (m_callbacks.m_pfnPostSubmit) { m_callbacks.m_pfnPostSubmit(); } TrimDeletedObjects(); TrimResourcePools(); const UINT64 completedFence = GetCompletedFenceValue(COMMAND_LIST_TYPE::GRAPHICS); if (m_bUseRingBufferDescriptorHeaps) { m_ViewHeap.m_DescriptorRingBuffer.Deallocate(completedFence); m_SamplerHeap.m_DescriptorRingBuffer.Deallocate(completedFence); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::RollOverHeap(OnlineDescriptorHeap& Heap) noexcept(false) { auto pfnCreateNew = [this](D3D12_DESCRIPTOR_HEAP_DESC const& Desc) -> unique_comptr { unique_comptr spHeap; ThrowFailure(m_pDevice12->CreateDescriptorHeap(&Desc, IID_PPV_ARGS(&spHeap))); return std::move(spHeap); }; // If we are in the growth phase don't bother using pools if (Heap.m_Desc.NumDescriptors < Heap.m_MaxHeapSize) { // Defer delete the current heap AddObjectToDeferredDeletionQueue(Heap.m_pDescriptorHeap.get(), COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS), true); // Grow Heap.m_Desc.NumDescriptors *= 2; Heap.m_Desc.NumDescriptors = min(Heap.m_Desc.NumDescriptors, Heap.m_MaxHeapSize); Heap.m_pDescriptorHeap = TryAllocateResourceWithFallback([&]() { return pfnCreateNew(Heap.m_Desc); }, ResourceAllocationContext::ImmediateContextThreadLongLived); } else { // If we reach this point they are really heavy heap users so we can fall back the roll over strategy Heap.m_HeapPool.ReturnToPool(std::move(Heap.m_pDescriptorHeap), GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); UINT64 CurrentFenceValue = GetCompletedFenceValue(COMMAND_LIST_TYPE::GRAPHICS); Heap.m_pDescriptorHeap = TryAllocateResourceWithFallback([&]() { return Heap.m_HeapPool.RetrieveFromPool(CurrentFenceValue, pfnCreateNew, Heap.m_Desc); // throw( _com_error ) }, ResourceAllocationContext::ImmediateContextThreadLongLived); } Heap.m_DescriptorRingBuffer = CFencedRingBuffer(Heap.m_Desc.NumDescriptors); Heap.m_DescriptorHeapBase = Heap.m_pDescriptorHeap->GetGPUDescriptorHandleForHeapStart().ptr; Heap.m_DescriptorHeapBaseCPU = Heap.m_pDescriptorHeap->GetCPUDescriptorHandleForHeapStart().ptr; ID3D12DescriptorHeap* pHeaps[2] = {m_ViewHeap.m_pDescriptorHeap.get(), m_SamplerHeap.m_pDescriptorHeap.get()}; GetGraphicsCommandList()->SetDescriptorHeaps(ComputeOnly() ? 1 : 2, pHeaps); m_DirtyStates |= Heap.m_BitsToSetOnNewHeap; } //---------------------------------------------------------------------------------------------------------------------------------- UINT ImmediateContext::ReserveSlots(OnlineDescriptorHeap& Heap, UINT NumSlots) noexcept(false) { assert(NumSlots <= Heap.m_Desc.NumDescriptors); UINT offset = 0; HRESULT hr = S_OK; do { hr = Heap.m_DescriptorRingBuffer.Allocate(NumSlots, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS), offset); if (FAILED(hr)) { RollOverHeap(Heap); } } while (FAILED(hr)); assert(offset < Heap.m_Desc.NumDescriptors); assert(offset + NumSlots <= Heap.m_Desc.NumDescriptors); return offset; } //---------------------------------------------------------------------------------------------------------------------------------- UINT ImmediateContext::ReserveSlotsForBindings(OnlineDescriptorHeap& Heap, UINT (ImmediateContext::*pfnCalcRequiredSlots)()) noexcept(false) { UINT NumSlots = (this->*pfnCalcRequiredSlots)(); assert(NumSlots <= Heap.m_Desc.NumDescriptors); UINT offset = 0; HRESULT hr = S_OK; do { hr = Heap.m_DescriptorRingBuffer.Allocate(NumSlots, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS), offset); if (FAILED(hr)) { RollOverHeap(Heap); NumSlots = (this->*pfnCalcRequiredSlots)(); } } while (FAILED(hr)); assert(offset < Heap.m_Desc.NumDescriptors); assert(offset + NumSlots <= Heap.m_Desc.NumDescriptors); return offset; } //---------------------------------------------------------------------------------------------------------------------------------- RootSignature* ImmediateContext::CreateOrRetrieveRootSignature(RootSignatureDesc const& desc) noexcept(false) { auto& result = m_RootSignatures[desc]; if (!result) { result.reset(new RootSignature(this, desc)); } return result.get(); } //---------------------------------------------------------------------------------------------------------------------------------- static const D3D12_RECT g_cMaxScissorRect = { D3D12_VIEWPORT_BOUNDS_MIN, D3D12_VIEWPORT_BOUNDS_MIN, D3D12_VIEWPORT_BOUNDS_MAX, D3D12_VIEWPORT_BOUNDS_MAX }; static const D3D12_RECT g_cMaxScissors[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = { g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, g_cMaxScissorRect, }; //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::SetScissorRectsHelper() noexcept { if (ComputeOnly()) { return; } if (!m_ScissorRectEnable) { // Set 12 scissor rects to max scissor rects to effectively disable scissor rect culling GetGraphicsCommandList()->RSSetScissorRects(D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE, g_cMaxScissors); } else { // Set 12 scissor rects to 11 scissor rects GetGraphicsCommandList()->RSSetScissorRects(m_uNumScissors, reinterpret_cast(m_aScissors)); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::RefreshNonHeapBindings(UINT64 DirtyBits) noexcept { if ((DirtyBits & e_NonHeapBindingsDirty) == 0) { return; } if (DirtyBits & e_IndexBufferDirty) { auto pIB = *m_CurrentState.m_IB.GetBound(); DXGI_FORMAT fmt = m_IndexBufferFormat == DXGI_FORMAT_UNKNOWN ? DXGI_FORMAT_R16_UINT : m_IndexBufferFormat; m_CurrentState.m_IB.ResetDirty(); D3D12_INDEX_BUFFER_VIEW IBViewDesc = {}; IBViewDesc.Format = fmt; GetBufferViewDesc(pIB, IBViewDesc, m_uIndexBufferOffset); GetGraphicsCommandList()->IASetIndexBuffer(&IBViewDesc); } if (DirtyBits & e_VertexBuffersDirty) { const UINT MaxVBs = m_CurrentState.m_VBs.NumBindings; D3D12_VERTEX_BUFFER_VIEW VBViewDescs[MaxVBs]; UINT numVBs = m_CurrentState.m_VBs.GetNumBound(); UINT numNulls = max(m_CurrentState.m_LastVBCount, numVBs) - numVBs; m_CurrentState.m_LastVBCount = numVBs; ASSUME(numVBs + numNulls <= MaxVBs); m_CurrentState.m_VBs.ResetDirty(); for (UINT i = 0; i < numVBs; ++i) { auto pBuffer = m_CurrentState.m_VBs.GetBound()[i]; UINT APIOffset = m_auVertexOffsets[i]; GetBufferViewDesc(pBuffer, VBViewDescs[i], APIOffset); VBViewDescs[i].StrideInBytes = m_auVertexStrides[i]; } ZeroMemory(&VBViewDescs[numVBs], sizeof(VBViewDescs[0]) * (numNulls)); GetGraphicsCommandList()->IASetVertexBuffers(0, numVBs + numNulls, VBViewDescs); } if (DirtyBits & e_StreamOutputDirty) { const UINT MaxSO = m_CurrentState.m_SO.NumBindings; D3D12_STREAM_OUTPUT_BUFFER_VIEW SOViewDescs[ MaxSO ]; UINT numSOBuffers = m_CurrentState.m_SO.GetNumBound(); m_CurrentState.m_SO.ResetDirty(); for (UINT i = 0; i < numSOBuffers; ++i) { auto pBuffer = m_CurrentState.m_SO.GetBound()[i]; assert(GetDynamicBufferOffset(pBuffer) == 0); // 11on12 doesn't support renaming stream-output buffers GetBufferViewDesc(pBuffer, SOViewDescs[i], 0); static_assert(0 == offsetof(SStreamOutputSuffix, BufferFilledSize), "Assumed offset to struct == offset to field"); SOViewDescs[i].BufferFilledSizeLocation = pBuffer ? (SOViewDescs[i].BufferLocation + pBuffer->GetOffsetToStreamOutputSuffix()) : 0; } ZeroMemory(&SOViewDescs[numSOBuffers], sizeof(SOViewDescs[0]) * (MaxSO - numSOBuffers)); GetGraphicsCommandList()->SOSetTargets(0, MaxSO, SOViewDescs); } if (DirtyBits & e_RenderTargetsDirty) { const UINT MaxRTVs = m_CurrentState.m_RTVs.NumBindings; UINT numRTVs = m_CurrentState.m_RTVs.GetNumBound(); D3D12_CPU_DESCRIPTOR_HANDLE RTVDescriptors[ MaxRTVs ]; D3D12_CPU_DESCRIPTOR_HANDLE *pDSVDescriptor = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE DSVDescriptor; m_CurrentState.m_RTVs.ResetDirty(); for (UINT i = 0; i < numRTVs; ++i) { auto pRTV = m_CurrentState.m_RTVs.GetBound()[i]; RTVDescriptors[i] = m_NullRTV; if (pRTV) { RTVDescriptors[i] = pRTV->GetRefreshedDescriptorHandle(); } } m_CurrentState.m_DSVs.ResetDirty(); auto pDSV = m_CurrentState.m_DSVs.GetBound()[0]; if (pDSV) { DSVDescriptor = pDSV->GetRefreshedDescriptorHandle(); pDSVDescriptor = &DSVDescriptor; } GetGraphicsCommandList()->OMSetRenderTargets(numRTVs, RTVDescriptors, false, pDSVDescriptor); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::PreRender(COMMAND_LIST_TYPE type) noexcept { if (type == COMMAND_LIST_TYPE::GRAPHICS) { // D3D11 predicates do not apply to video if (m_StatesToReassert & e_PredicateDirty) { if (m_CurrentState.m_pPredicate) { m_CurrentState.m_pPredicate->UsedInCommandList(type, GetCommandListID(type)); SetPredicationInternal(m_CurrentState.m_pPredicate, m_PredicateValue); } else { SetPredicationInternal(nullptr, false); } AdditionalCommandsAdded(type); } m_StatesToReassert &= ~(e_PredicateDirty); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::PostRender(COMMAND_LIST_TYPE type, UINT64 ReassertBitsToAdd) { m_StatesToReassert |= ReassertBitsToAdd; AdditionalCommandsAdded(type); GetCommandListManager(type)->SubmitCommandListIfNeeded(); #if DBG if (m_DebugFlags & Debug_FlushOnRender && HasCommands(type)) { SubmitCommandList(type); // throws } #else UNREFERENCED_PARAMETER(type); #endif } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::PostDraw() { m_CommandLists[(UINT)COMMAND_LIST_TYPE::GRAPHICS]->DrawCommandAdded(); PostRender(COMMAND_LIST_TYPE::GRAPHICS); #if DBG if (m_DebugFlags & Debug_FlushOnDraw && HasCommands(COMMAND_LIST_TYPE::GRAPHICS)) { SubmitCommandList(COMMAND_LIST_TYPE::GRAPHICS); // throws } #endif } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::PostDispatch() { m_CommandLists[(UINT)COMMAND_LIST_TYPE::GRAPHICS]->DispatchCommandAdded(); PostRender(COMMAND_LIST_TYPE::GRAPHICS); #if DBG if (m_DebugFlags & Debug_FlushOnDispatch && HasCommands(COMMAND_LIST_TYPE::GRAPHICS)) { SubmitCommandList(COMMAND_LIST_TYPE::GRAPHICS); // throws } #endif } D3D12_BOX ImmediateContext::GetSubresourceBoxFromBox(Resource *pSrc, UINT RequestedSubresource, UINT BaseSubresource, D3D12_BOX const& SrcBox) { assert(BaseSubresource <= RequestedSubresource); D3D12_BOX box = SrcBox; // This method should not be used with empty rects, as it ensures its output rect is not empty. assert(box.left < box.right && box.top < box.bottom && box.front < box.back); // RequestedSubresource is a D3D12 subresource, before calling into 11 it needs to be converted back UINT ApiRequestedSubresource = RequestedSubresource; UINT ApiBaseSubresource = BaseSubresource; if (pSrc->SubresourceMultiplier() > 1) { assert(pSrc->AppDesc()->NonOpaquePlaneCount() == 1); ApiRequestedSubresource = ConvertSubresourceIndexRemovePlane(RequestedSubresource, pSrc->AppDesc()->SubresourcesPerPlane()); ApiBaseSubresource = ConvertSubresourceIndexRemovePlane(BaseSubresource, pSrc->AppDesc()->SubresourcesPerPlane()); } { auto& footprint = pSrc->GetSubresourcePlacement(RequestedSubresource).Footprint; // Planar textures do not support mipmaps, and in this case coordinates should // not be divided by width / height alignment. if (pSrc->AppDesc()->NonOpaquePlaneCount() > 1) { UINT PlaneIndex = GetPlaneIdxFromSubresourceIdx(ApiRequestedSubresource, pSrc->AppDesc()->SubresourcesPerPlane()); UINT BasePlaneIndex = GetPlaneIdxFromSubresourceIdx(ApiBaseSubresource, pSrc->AppDesc()->SubresourcesPerPlane()); if (PlaneIndex > 0 && BasePlaneIndex == 0) { // Adjust for subsampling. UINT subsampleX, subsampleY; CD3D11FormatHelper::GetYCbCrChromaSubsampling(pSrc->AppDesc()->Format(), subsampleX, subsampleY); // Round up on the right bounds to prevent empty rects. box.right = min(footprint.Width, (box.right + (subsampleX - 1)) / subsampleX); box.left = min(box.right, box.left / subsampleX); box.bottom = min(footprint.Height, (box.bottom + (subsampleY - 1)) / subsampleY); box.top = min(box.bottom, box.top / subsampleY); } else { // Make sure the box is at least contained within the subresource. box.right = min(footprint.Width, box.right); box.left = min(box.right, box.left); box.bottom = min(footprint.Height, box.bottom); box.top = min(box.bottom, box.top); } } else { // Get the mip level of the subresource const UINT mipLevel = DecomposeSubresourceIdxExtendedGetMip(ApiRequestedSubresource, pSrc->AppDesc()->MipLevels()); const UINT baseMipLevel = DecomposeSubresourceIdxExtendedGetMip(ApiBaseSubresource, pSrc->AppDesc()->MipLevels()); const UINT mipTransform = mipLevel - baseMipLevel; static_assert(D3D12_REQ_MIP_LEVELS < 32, "Bitshifting by number of mips should be fine for a UINT."); const UINT WidthAlignment = CD3D11FormatHelper::GetWidthAlignment(pSrc->AppDesc()->Format()); const UINT HeightAlignment = CD3D11FormatHelper::GetHeightAlignment(pSrc->AppDesc()->Format()); const UINT DepthAlignment = CD3D11FormatHelper::GetDepthAlignment(pSrc->AppDesc()->Format()); // AlignAtLeast is chosen for right bounds to prevent bitshifting from resulting in empty rects. box.right = min(footprint.Width, AlignAtLeast(box.right >> mipTransform, WidthAlignment)); box.left = min(box.right, Align( box.left >> mipTransform, WidthAlignment)); box.bottom = min(footprint.Height, AlignAtLeast(box.bottom >> mipTransform, HeightAlignment)); box.top = min(box.bottom, Align( box.top >> mipTransform, HeightAlignment)); box.back = min(footprint.Depth, AlignAtLeast(box.back >> mipTransform, DepthAlignment)); box.front = min(box.back, Align( box.front >> mipTransform, DepthAlignment)); } } // This method should not generate empty rects. assert(box.left < box.right && box.top < box.bottom && box.front < box.back); return box; } D3D12_BOX ImmediateContext::GetBoxFromResource(Resource *pSrc, UINT SrcSubresource) { return GetSubresourceBoxFromBox(pSrc, SrcSubresource, 0, CD3DX12_BOX(0, 0, 0, pSrc->AppDesc()->Width(), pSrc->AppDesc()->Height(), pSrc->AppDesc()->Depth())); } // Handles copies that are either: // * A copy to/from the same subresource but at different offsets // * A copy to/from suballocated resources that are both from the same underlying heap void ImmediateContext::SameResourceCopy(Resource *pDst, UINT DstSubresource, Resource *pSrc, UINT SrcSubresource, UINT dstX, UINT dstY, UINT dstZ, const D3D12_BOX *pSrcBox) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"Same Resource Copy"); #endif D3D12_BOX PatchedBox = {}; if (!pSrcBox) { PatchedBox = GetBoxFromResource(pSrc, SrcSubresource); pSrcBox = &PatchedBox; } const bool bIsBoxEmpty = (pSrcBox->left >= pSrcBox->right || pSrcBox->top >= pSrcBox->bottom || pSrcBox->front >= pSrcBox->back); if (bIsBoxEmpty) { return; } // TODO: Profile the best strategy for handling same resource copies based on perf from games running on 9on12/11on12. // The default strategy is keep a per-context buffer that we re-use whenever we need to handle copies that require an intermediate // buffer. The trade-off is that the GPU needs to swizzle-deswizzle when copying in and out of the resource. The alternative strategy is // to instead allocate a resource everytime a same-resource copy is done but the intermediate resource will match the src/dst // resource, avoiding any need to swizzle/deswizzle. // Task captured in VSO #7121286 ResourceCreationArgs StagingResourceCreateArgs = {}; D3D12_RESOURCE_DESC &StagingDesc = StagingResourceCreateArgs.m_desc12; StagingDesc = pSrc->Parent()->m_desc12; bool bUseBufferCopy = StagingDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; // Use pSrcBox to determine the minimal size we need to allocate for the staging resource UINT StagingHeight = StagingDesc.Height = pSrcBox->bottom - pSrcBox->top; StagingDesc.Width = pSrcBox->right - pSrcBox->left; UINT StagingWidth = static_cast(StagingDesc.Width); UINT StagingDepth = StagingDesc.DepthOrArraySize = static_cast(pSrcBox->back - pSrcBox->front); // Query the footprint, this is necessary for resources that have varying formats per subresource (planar formats such as NV12) DXGI_FORMAT StagingFormat = StagingDesc.Format = bUseBufferCopy ? DXGI_FORMAT_UNKNOWN : pSrc->GetSubresourcePlacement(SrcSubresource).Footprint.Format; StagingDesc.MipLevels = 1; StagingDesc.Flags = (D3D12_RESOURCE_FLAGS)0; StagingResourceCreateArgs.m_appDesc = AppResourceDesc(0, 1, 1, 1, 1, StagingDepth, StagingWidth, StagingHeight, StagingFormat, StagingDesc.SampleDesc.Count, StagingDesc.SampleDesc.Quality, RESOURCE_USAGE_DEFAULT, (RESOURCE_CPU_ACCESS)0, (RESOURCE_BIND_FLAGS)0, StagingDesc.Dimension); UINT64 resourceSize = 0; m_pDevice12->GetCopyableFootprints(&StagingDesc, 0, 1, 0, nullptr, nullptr, nullptr, &resourceSize); bool bReallocateStagingBuffer = false; auto &pStagingResource = bUseBufferCopy ? m_pStagingBuffer : m_pStagingTexture; StagingDesc = CD3DX12_RESOURCE_DESC::Buffer(resourceSize); StagingResourceCreateArgs.m_isPlacedTexture = !bUseBufferCopy; if (!pStagingResource || pStagingResource->Parent()->m_heapDesc.SizeInBytes < resourceSize) { bReallocateStagingBuffer = true; } if (bReallocateStagingBuffer) { StagingResourceCreateArgs.m_heapDesc = CD3DX12_HEAP_DESC(resourceSize, GetHeapProperties(D3D12_HEAP_TYPE_DEFAULT)); pStagingResource = Resource::CreateResource(this, StagingResourceCreateArgs, ResourceAllocationContext::ImmediateContextThreadLongLived); } else { pStagingResource->UpdateAppDesc(StagingResourceCreateArgs.m_appDesc); } assert(pStagingResource); const D3D12_BOX StagingSrcBox = CD3DX12_BOX(0, 0, 0, pSrcBox->right - pSrcBox->left, pSrcBox->bottom - pSrcBox->top, pSrcBox->back - pSrcBox->front); // Pick just one of the resources to call transitions on (don't need to transition both the src and dst since the underlying resource is the same), // we pick pDst since PostCopy will revert it back to COPY_SOURCE if it's an upload heap const UINT TransitionSubresource = DstSubresource; Resource *pTransitionResource = pDst; m_ResourceStateManager.TransitionResource(pStagingResource.get(), D3D12_RESOURCE_STATE_COPY_DEST); m_ResourceStateManager.TransitionSubresource(pTransitionResource, TransitionSubresource, D3D12_RESOURCE_STATE_COPY_SOURCE); m_ResourceStateManager.ApplyAllResourceTransitions(); CopyAndConvertSubresourceRegion(pStagingResource.get(), 0, pSrc, SrcSubresource, 0, 0, 0, reinterpret_cast(pSrcBox)); m_ResourceStateManager.TransitionResource(pStagingResource.get(), D3D12_RESOURCE_STATE_GENERIC_READ); m_ResourceStateManager.TransitionSubresource(pTransitionResource, TransitionSubresource, D3D12_RESOURCE_STATE_COPY_DEST); m_ResourceStateManager.ApplyAllResourceTransitions(); CopyAndConvertSubresourceRegion(pDst, DstSubresource, pStagingResource.get(), 0, dstX, dstY, dstZ, reinterpret_cast(&StagingSrcBox)); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::PostCopy(Resource *pSrc, UINT srcSubresource, Resource *pDst, UINT dstSubresource, UINT totalNumSubresources) { bool bUsesSameUnderlyingSubresource = Resource::IsSameUnderlyingSubresource(pSrc, srcSubresource, pDst, dstSubresource); #if DBG for (UINT i = 1; i < totalNumSubresources; i++) { assert(bUsesSameUnderlyingSubresource == Resource::IsSameUnderlyingSubresource(pSrc, srcSubresource + i, pDst, dstSubresource + i)); } #else UNREFERENCED_PARAMETER(totalNumSubresources); #endif PostRender(COMMAND_LIST_TYPE::GRAPHICS); // Revert suballocated resource's owning heap back to the default state bool bResourceTransitioned = false; if (pSrc && !pSrc->GetIdentity()->m_bOwnsUnderlyingResource && pSrc->GetAllocatorHeapType() == AllocatorHeapType::Readback && !bUsesSameUnderlyingSubresource) // Will automatically be transitioned back to COPY_DEST if this is part of a same resource copy { m_ResourceStateManager.TransitionResource(pSrc, GetDefaultPoolState(pSrc->GetAllocatorHeapType())); bResourceTransitioned = true; } if (pDst && !pDst->GetIdentity()->m_bOwnsUnderlyingResource && pDst->GetAllocatorHeapType() == AllocatorHeapType::Upload) { m_ResourceStateManager.TransitionResource(pDst, GetDefaultPoolState(pDst->GetAllocatorHeapType())); bResourceTransitioned = true; } if (bResourceTransitioned) { m_ResourceStateManager.ApplyAllResourceTransitions(); } #if DBG if (m_DebugFlags & Debug_FlushOnCopy && HasCommands(COMMAND_LIST_TYPE::GRAPHICS)) { SubmitCommandList(COMMAND_LIST_TYPE::GRAPHICS); // throws } #endif } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::PostUpload() { PostRender(COMMAND_LIST_TYPE::GRAPHICS); #if DBG if (m_DebugFlags & Debug_FlushOnDataUpload && HasCommands(COMMAND_LIST_TYPE::GRAPHICS)) { SubmitCommandList(COMMAND_LIST_TYPE::GRAPHICS); // throws } #endif } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ConstantBufferBound(Resource* pBuffer, UINT slot, EShaderStage stage) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.ConstantBufferBound(stage, slot); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ConstantBufferUnbound(Resource* pBuffer, UINT slot, EShaderStage stage) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.ConstantBufferUnbound(stage, slot); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::VertexBufferBound(Resource* pBuffer, UINT slot) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.VertexBufferBound(slot); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::VertexBufferUnbound(Resource* pBuffer, UINT slot) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.VertexBufferUnbound(slot); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::IndexBufferBound(Resource* pBuffer) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.IndexBufferBound(); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::IndexBufferUnbound(Resource* pBuffer) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.IndexBufferUnbound(); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::StreamOutputBufferBound(Resource* pBuffer, UINT slot) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.StreamOutputBufferBound(slot); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::StreamOutputBufferUnbound(Resource* pBuffer, UINT slot) noexcept { if (pBuffer == nullptr) return; pBuffer->m_currentBindings.StreamOutputBufferUnbound(slot); pBuffer->m_pParent->m_ResourceStateManager.TransitionResourceForBindings(pBuffer); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::AddObjectToResidencySet(Resource *pResource, COMMAND_LIST_TYPE commandListType) { m_CommandLists[(UINT)commandListType]->AddResourceToResidencySet(pResource); } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::Flush(UINT commandListTypeMask) { #ifdef USE_PIX PIXSetMarker(0ull, L"Flush"); #endif bool bSubmitCommandList = false; m_ResourceStateManager.ApplyAllResourceTransitions(); for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if ((commandListTypeMask & (1 << i)) && m_CommandLists[i] && m_CommandLists[i]->HasCommands()) { m_CommandLists[i]->SubmitCommandList(); bSubmitCommandList = true; } } // Even if there are no commands, the app could have still done things like delete resources, // these are expected to be cleaned up on a per-flush basis PostSubmitNotification(); return bSubmitCommandList; } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::PrepForCommandQueueSync(UINT commandListTypeMask) { Flush(commandListTypeMask); for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if ((commandListTypeMask & (1 << i)) && m_CommandLists[i]) { assert(!m_CommandLists[i]->HasCommands()); m_CommandLists[i]->PrepForCommandQueueSync(); } } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::IaSetTopology(D3D12_PRIMITIVE_TOPOLOGY topology ) { m_PrimitiveTopology = topology; GetGraphicsCommandList()->IASetPrimitiveTopology(m_PrimitiveTopology); m_StatesToReassert &= ~(e_PrimitiveTopologyDirty); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::IaSetVertexBuffers(UINT StartSlot, __in_range(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) UINT NumBuffers, Resource* const* pVBs, const UINT*pStrides, const UINT* pOffsets) { // TODO: Partial bindings for (UINT i = 0; i < NumBuffers; ++i) { UINT slot = i + StartSlot; Resource* pVB = pVBs[i]; m_CurrentState.m_VBs.UpdateBinding(slot, pVB, e_Graphics); m_auVertexOffsets[slot] = pOffsets[i]; m_auVertexStrides[slot] = pStrides[i]; } // TODO: Track offsets to conditionally set this dirty bit m_DirtyStates |= e_VertexBuffersDirty; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::IaSetIndexBuffer(Resource* pIB, DXGI_FORMAT fmt, UINT offset) { m_CurrentState.m_IB.UpdateBinding(0, pIB, e_Graphics); m_IndexBufferFormat = fmt; m_uIndexBufferOffset = offset; // TODO: Track changes to format/offset to conditionally set this dirty bit m_DirtyStates |= e_IndexBufferDirty; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SoSetTargets(_In_range_(0, 4) UINT NumTargets, _In_range_(0, 4) UINT ClearSlots, _In_reads_(NumTargets) Resource* const* pBuffers, _In_reads_(NumTargets) const UINT* offsets ) { // TODO: Partial bindings bool bDirty = false; for (UINT i = 0; i < NumTargets; ++i) { UINT slot = i; Resource* pSOBuffer = pBuffers[i]; if ((offsets[i] != UINT_MAX) && pSOBuffer) { // Copy the new offset into the hidden BufferFilledSize assert(0 == offsetof(SStreamOutputSuffix, BufferFilledSize)); UINT OffsetToBufferFilledSize = pSOBuffer->GetOffsetToStreamOutputSuffix(); D3D12_BOX DstBox = { OffsetToBufferFilledSize, 0, 0, OffsetToBufferFilledSize + sizeof(UINT), 1, 1 }; D3D11_SUBRESOURCE_DATA Data = {&offsets[i]}; UpdateSubresources(pSOBuffer, CSubresourceSubset(CBufferView()), &Data, &DstBox, UpdateSubresourcesFlags::ScenarioImmediateContextInternalOp); } bDirty |= m_CurrentState.m_SO.UpdateBinding(slot, pSOBuffer, e_Graphics); } for (UINT i = NumTargets; i < NumTargets + ClearSlots; ++i) { bDirty |= m_CurrentState.m_SO.UpdateBinding(i, nullptr, e_Graphics); } m_DirtyStates |= bDirty ? e_StreamOutputDirty : 0; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::OMSetRenderTargets(__in_ecount(NumRTVs) RTV* const* ppRTVs, __in_range(0, 8) UINT NumRTVs, __in_opt DSV *pDSV) { bool bDirtyRTVs = false; for (UINT i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) { auto pRTV = i < NumRTVs ? ppRTVs[i] : nullptr; if (m_CurrentState.m_RTVs.UpdateBinding(i, pRTV, e_Graphics)) { bDirtyRTVs = true; } } if (m_CurrentState.m_DSVs.UpdateBinding(0, pDSV, e_Graphics)) { bDirtyRTVs = true; // RTVs and DSV are updated together } m_DirtyStates |= bDirtyRTVs ? e_RenderTargetsDirty : 0; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::OMSetUnorderedAccessViews(UINT Start, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumViews, __in_ecount(NumViews) UAV* const* ppUAVs, __in_ecount(NumViews) CONST UINT* pInitialCounts ) { for (UINT i = 0; i < NumViews; ++i) { UINT slot = i + Start; UAV* pUAV = ppUAVs[i]; // Ensure a counter resource is allocated for the UAV if necessary if(pUAV) { pUAV->EnsureCounterResource(); // throw( _com_error ) } if ((pInitialCounts[i] != UINT_MAX) && pUAV) { pUAV->UpdateCounterValue(pInitialCounts[i]); } m_CurrentState.m_UAVs.UpdateBinding(slot, pUAV, e_Graphics); } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::CsSetUnorderedAccessViews(UINT Start, __in_range(0, D3D11_1_UAV_SLOT_COUNT) UINT NumViews, __in_ecount(NumViews) UAV* const* ppUAVs, __in_ecount(NumViews) CONST UINT* pInitialCounts) { for (UINT i = 0; i < NumViews; ++i) { UINT slot = i + Start; UAV* pUAV = ppUAVs[i]; // Ensure a counter resource is allocated for the UAV if necessary if (pUAV) { pUAV->EnsureCounterResource(); // throw( _com_error ) } if ((pInitialCounts[i] != UINT_MAX) && pUAV) { pUAV->UpdateCounterValue(pInitialCounts[i]); } m_CurrentState.m_CSUAVs.UpdateBinding(slot, pUAV, e_Compute); } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::OMSetStencilRef(UINT StencilRef ) { m_uStencilRef = StencilRef; GetGraphicsCommandList()->OMSetStencilRef(StencilRef); m_StatesToReassert &= ~(e_StencilRefDirty); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::OMSetBlendFactor(const FLOAT BlendFactor[4]) { memcpy(m_BlendFactor, BlendFactor, sizeof(m_BlendFactor)); GetGraphicsCommandList()->OMSetBlendFactor(BlendFactor); m_StatesToReassert &= ~(e_BlendFactorDirty); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetViewport(UINT slot, const D3D12_VIEWPORT* pViewport) { m_aViewports[slot] = *pViewport; m_StatesToReassert |= e_ViewportsDirty; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetNumViewports(UINT num) { m_uNumViewports = num; m_StatesToReassert |= e_ViewportsDirty; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetScissorRect(UINT slot, const D3D12_RECT* pRect ) { m_aScissors[slot] = *pRect; m_StatesToReassert |= e_ScissorRectsDirty; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetNumScissorRects(UINT num) { m_uNumScissors = num; m_StatesToReassert |= e_ScissorRectsDirty; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetScissorRectEnable(BOOL ScissorRectEnable) { if (m_ScissorRectEnable != ScissorRectEnable) { m_ScissorRectEnable = ScissorRectEnable; m_StatesToReassert |= e_ScissorRectsDirty; } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearRenderTargetView(RTV *pRTV, CONST FLOAT color[4], UINT NumRects, const D3D12_RECT *pRects) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); TransitionResourceForView(pRTV, D3D12_RESOURCE_STATE_RENDER_TARGET); m_ResourceStateManager.ApplyAllResourceTransitions(); auto Descriptor = pRTV->GetRefreshedDescriptorHandle(); GetGraphicsCommandList()->ClearRenderTargetView(Descriptor, color, NumRects, pRects); PostRender(COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearDepthStencilView(DSV *pDSV, UINT Flags, FLOAT Depth, UINT8 Stencil, UINT NumRects, const D3D12_RECT *pRects) { if (!Flags) { return; } PreRender(COMMAND_LIST_TYPE::GRAPHICS); if (Flags == 0) { // The runtime guarantees that clear flags won't cause clears on read-only planes of the view, // but doesn't drop the call if no clears are happening return; } { D3D12_DEPTH_STENCIL_VIEW_DESC DSVDesc = pDSV->GetDesc12(); DSVDesc.Flags = (D3D12_DSV_FLAGS)((((Flags & D3D11_CLEAR_DEPTH) == 0) ? D3D12_DSV_FLAG_READ_ONLY_DEPTH : 0) | (((Flags & D3D11_CLEAR_STENCIL) == 0) ? D3D12_DSV_FLAG_READ_ONLY_STENCIL : 0)); CViewSubresourceSubset ViewSubresources(DSVDesc, pDSV->m_pResource->AppDesc()->MipLevels(), pDSV->m_pResource->AppDesc()->ArraySize(), pDSV->m_pResource->SubresourceMultiplier(), CViewSubresourceSubset::WriteOnly); assert(!ViewSubresources.IsEmpty()); m_ResourceStateManager.TransitionSubresources(pDSV->m_pResource, ViewSubresources, D3D12_RESOURCE_STATE_DEPTH_WRITE); } m_ResourceStateManager.ApplyAllResourceTransitions(); static_assert(D3D11_CLEAR_DEPTH == static_cast(D3D12_CLEAR_FLAG_DEPTH), "Casting flags"); static_assert(D3D11_CLEAR_STENCIL == static_cast(D3D12_CLEAR_FLAG_STENCIL), "Casting flags"); auto Descriptor = pDSV->GetRefreshedDescriptorHandle(); GetGraphicsCommandList()->ClearDepthStencilView(Descriptor, static_cast(Flags), Depth, Stencil, NumRects, pRects); PostRender(COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearUnorderedAccessViewUint(UAV *pUAV, CONST UINT color[4], UINT NumRects, const D3D12_RECT *pRects) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); pUAV->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); TransitionResourceForView(pUAV, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); m_ResourceStateManager.ApplyAllResourceTransitions(); auto Descriptor = pUAV->GetRefreshedDescriptorHandle(); UINT ViewHeapSlot = ReserveSlots(m_ViewHeap, 1); // throw( _com_error ) D3D12_GPU_DESCRIPTOR_HANDLE GPUDescriptor = m_ViewHeap.GPUHandle(ViewHeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptor = m_ViewHeap.CPUHandle(ViewHeapSlot); m_pDevice12->CopyDescriptorsSimple( 1, CPUDescriptor, Descriptor, m_ViewHeap.m_Desc.Type ); GetGraphicsCommandList()->ClearUnorderedAccessViewUint(GPUDescriptor, Descriptor, pUAV->m_pResource->GetUnderlyingResource(), color, NumRects, pRects); PostRender(COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearUnorderedAccessViewFloat(UAV *pUAV, CONST FLOAT color[4], UINT NumRects, const D3D12_RECT *pRects) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); pUAV->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); TransitionResourceForView(pUAV, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); m_ResourceStateManager.ApplyAllResourceTransitions(); auto Descriptor = pUAV->GetRefreshedDescriptorHandle(); UINT ViewHeapSlot = ReserveSlots(m_ViewHeap, 1); // throw( _com_error ) D3D12_GPU_DESCRIPTOR_HANDLE GPUDescriptor = m_ViewHeap.GPUHandle(ViewHeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptor = m_ViewHeap.CPUHandle(ViewHeapSlot); m_pDevice12->CopyDescriptorsSimple( 1, CPUDescriptor, Descriptor, m_ViewHeap.m_Desc.Type ); GetGraphicsCommandList()->ClearUnorderedAccessViewFloat(GPUDescriptor, Descriptor, pUAV->m_pResource->GetUnderlyingResource(), color, NumRects, pRects); PostRender(COMMAND_LIST_TYPE::GRAPHICS); } template T FloatTo(float x, T max = std::numeric_limits::max()) { return x != x ? (T)0 : (max > x ? (T)x : max); } //---------------------------------------------------------------------------------------------------------------------------------- // If a resource doesn't have a render target, we have to create a temp resource of identical format, clear the temp resource, and then // copy it's contents into this resource. The ResourceCache caches resources to ensure that we pool these temp resources and make future // clears fast. void TRANSLATION_API ImmediateContext::ClearResourceWithNoRenderTarget(Resource* pResource, CONST FLOAT color[4], UINT NumRects, const D3D12_RECT *pRects, UINT Subresource, UINT BaseSubresource, DXGI_FORMAT clearFormat) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"Clearing resource via copy"); #endif assert(CD3D11FormatHelper::GetTypeLevel(clearFormat) == D3D11FTL_FULL_TYPE); auto& Footprint = pResource->GetSubresourcePlacement(Subresource).Footprint; // The color we receive is intended for a specific plane. If we're clearing a different plane, we need to adjust the color. // ClearRenderTargetView always expects 4 colors, and we can shift at most two channels, so prep 6 colors. The values of the last two don't matter. // For most planar surfaces, we'll shift once so R goes to R for the Y plane, and GB goes to RG for the UV plane. // For 3-plane surfaces, we'll send one color to each plane. float adjustedColor[6] = {color[0], color[1], color[2], color[3], 0.0f, 0.0f}; UINT BasePlane = GetPlaneIdxFromSubresourceIdx(BaseSubresource, pResource->AppDesc()->SubresourcesPerPlane()); UINT TargetPlane = GetPlaneIdxFromSubresourceIdx(Subresource, pResource->AppDesc()->SubresourcesPerPlane()); assert(BasePlane <= TargetPlane); color = adjustedColor + (TargetPlane - BasePlane); RECT resourceRect = {}; resourceRect.right = Footprint.Width; resourceRect.bottom = Footprint.Height; const RECT *pCopyRects = &resourceRect; UINT numCopyRects = 1; if (NumRects) { auto& BaseFootprint = pResource->GetSubresourcePlacement(BaseSubresource).Footprint; m_RectCache.clear(); m_RectCache.reserve(NumRects); for (UINT i = 0; i < NumRects; ++i) { if (pRects[i].right <= pRects[i].left || pRects[i].bottom <= pRects[i].top || pRects[i].right <= 0 || pRects[i].bottom <= 0 || pRects[i].left >= static_cast(BaseFootprint.Width) || pRects[i].top >= static_cast(BaseFootprint.Height)) { // Drop empty rects, because GetSubresourceBoxFromBox wants to ensure that decreasing size due to // mip levels doesn't return zeroes, but empty rects will have zeroes. continue; } D3D12_BOX Box = GetSubresourceBoxFromBox(pResource, Subresource, BaseSubresource, CD3DX12_BOX(max(0l, pRects[i].left), max(0l, pRects[i].top), max(0l, pRects[i].right), max(0l, pRects[i].bottom))); m_RectCache.push_back( CD3DX12_RECT(Box.left, Box.top, Box.right, Box.bottom) ); } pCopyRects = m_RectCache.data(); numCopyRects = static_cast(m_RectCache.size()); } if (numCopyRects == 0) { // All rects were empty, no-op this call. return; } if (!SupportsRenderTarget(clearFormat)) { assert(!CD3D11FormatHelper::Planar(Footprint.Format)); // Each plane should have a a different non-planar format. BYTE ClearColor[16] = {}; // Note: Staging layout for all YUV formats is RGBA, even though the channels // mapped through views are potentially out-of-order. // Comments on channel mappings taken straight from MSDN. switch (Footprint.Format) { case DXGI_FORMAT_YUY2: // 8bit 4:2:2 { assert(CD3D11FormatHelper::GetByteAlignment(Footprint.Format) == 4); ClearColor[0] = FloatTo(color[0]); // Y0 -> R8 ClearColor[1] = FloatTo(color[1]); // U0 -> G8 ClearColor[2] = FloatTo(color[0]); // Y1 -> B8 ClearColor[3] = FloatTo(color[2]); // V0 -> A8 break; } case DXGI_FORMAT_Y210: // both represented as 16bit 4:2:2 case DXGI_FORMAT_Y216: { assert(CD3D11FormatHelper::GetByteAlignment(Footprint.Format) == 8); const USHORT maxValue = Footprint.Format == DXGI_FORMAT_Y210 ? (USHORT)((1 << 10) - 1) : (USHORT)((1 << 16) - 1); USHORT* pClearColor16bit = reinterpret_cast(ClearColor); pClearColor16bit[0] = FloatTo(color[0], maxValue); // Y0 -> R16 pClearColor16bit[1] = FloatTo(color[1], maxValue); // U0 -> G16 pClearColor16bit[2] = FloatTo(color[0], maxValue); // Y1 -> B16 pClearColor16bit[3] = FloatTo(color[2], maxValue); // V0 -> A16 break; } case DXGI_FORMAT_Y416: // 16bit 4:4:4 { assert(CD3D11FormatHelper::GetByteAlignment(Footprint.Format) == 8); USHORT* pClearColor16bit = reinterpret_cast(ClearColor); pClearColor16bit[0] = FloatTo(color[1]); // U -> R16 pClearColor16bit[1] = FloatTo(color[0]); // Y -> G16 pClearColor16bit[2] = FloatTo(color[2]); // V -> B16 pClearColor16bit[3] = FloatTo(color[3]); // A -> A16 break; } case DXGI_FORMAT_Y410: // 10bit 4:4:4, packed into R10G10B10A2 { assert(CD3D11FormatHelper::GetByteAlignment(Footprint.Format) == 4); const UINT maxValue = (1 << 10) - 1; UINT ClearColor1010102 = (FloatTo(color[1], maxValue)) | // U -> R10 (FloatTo(color[0], maxValue) << 10) | // Y -> G10 (FloatTo(color[2], maxValue) << 20) | // V -> B10 (FloatTo(color[3], 3) << 30); // A -> A2 *reinterpret_cast(ClearColor) = ClearColor1010102; break; } default: assert(false); return; // No-op the clear. } UINT Mip = 0, ArraySlice = 0, PlaneSlice = 0; pResource->DecomposeSubresource(Subresource, Mip, ArraySlice, PlaneSlice); CSubresourceSubset SingleSubresourceSubset(1, 1, 1, (UINT8)Mip, (UINT16)ArraySlice); for (UINT i = 0; i < numCopyRects; ++i) { D3D12_BOX srcBox = CD3DX12_BOX(pCopyRects[i].left, pCopyRects[i].top, pCopyRects[i].right, pCopyRects[i].bottom); UpdateSubresources(pResource, SingleSubresourceSubset, nullptr, &srcBox, UpdateSubresourcesFlags::ScenarioImmediateContext, ClearColor); } } else { // The target resource does not support render target, but the format can support render target. // Create a temporary resource to clear, and copy to the target resource. // The temporary resource has a max size and tiled copies are performed if needed. // Get the 64K tile shape and scale it up to about 1MB. This is done to compensate for // command submission overhead and can be tuned. D3D11_TILE_SHAPE tileShape = {}; CD3D11FormatHelper::GetTileShape(&tileShape, clearFormat, D3D11_RESOURCE_DIMENSION_TEXTURE2D, 1); tileShape.WidthInTexels *= 4; tileShape.HeightInTexels *= 4; DXGI_FORMAT viewFormat = DXGI_FORMAT_UNKNOWN; FLOAT ClearColor[4] = { color[0], color[1], color[2], color[3] }; switch (clearFormat) { case DXGI_FORMAT_AYUV: viewFormat = DXGI_FORMAT_R8G8B8A8_UINT; ClearColor[0] = color[2]; // V8 -> R8 ClearColor[1] = color[1]; // U8 -> G8 ClearColor[2] = color[0]; // Y8 -> B8 ClearColor[3] = color[3]; // A8 -> A8 break; } // Request a resource with the tile size. auto& CacheEntry = GetResourceCache().GetResource(clearFormat, tileShape.WidthInTexels, tileShape.HeightInTexels, viewFormat); // The resource cache returns a resource that is the requested size or larger. tileShape.WidthInTexels = CacheEntry.m_Resource->AppDesc()->Width(); tileShape.HeightInTexels = CacheEntry.m_Resource->AppDesc()->Height(); // Find the largest rectangle that must be cleared. D3D12_RECT clearRect = pCopyRects[0]; for (UINT i = 1; i < numCopyRects; ++i) { UnionRect(&clearRect, &clearRect, &pCopyRects[i]); } // Translate that rectangle to 0,0 clearRect.right = clearRect.right - clearRect.left; clearRect.left = 0; clearRect.bottom = clearRect.bottom - clearRect.top; clearRect.top = 0; // Find the minimum region to clear in the tile. This is smaller than the full tile when // the destination rectangles are smaller than the tile size. { D3D12_RECT clearResourceRect = CD3DX12_RECT(0, 0, tileShape.WidthInTexels, tileShape.HeightInTexels); IntersectRect(&clearRect, &clearRect, &clearResourceRect); } // Clear the region needed in the allocated resource. ClearRenderTargetView(CacheEntry.m_RTV.get(), ClearColor, 1, &clearRect); // Loop over the rects to clear the region. for (UINT i = 0; i < numCopyRects; ++i) { D3D12_RECT copyRect = pCopyRects[i]; // Loop over tile rows of the source. while (copyRect.top < copyRect.bottom) { // Construct the source rect to copy. Clamp to account for the bottom edge of the destination. D3D12_RECT srcTileCopyRect = CD3DX12_RECT(clearRect.left, clearRect.top, 0, 0); srcTileCopyRect.bottom = srcTileCopyRect.top + std::min(clearRect.bottom - clearRect.top, copyRect.bottom - copyRect.top); copyRect.left = pCopyRects[i].left; // Loop over tile columns of this row. while (copyRect.left < copyRect.right) { // Clamp to the right edge of the destination. srcTileCopyRect.right = srcTileCopyRect.left + std::min(clearRect.right - clearRect.left, copyRect.right - copyRect.left); // Copy to the target to clear it. D3D12_BOX srcBox = CD3DX12_BOX(srcTileCopyRect.left, srcTileCopyRect.top, srcTileCopyRect.right, srcTileCopyRect.bottom); ResourceCopyRegion( pResource, Subresource, copyRect.left, copyRect.top, 0, CacheEntry.m_Resource.get(), 0, &srcBox); // Advanced to the next tile column. copyRect.left += srcTileCopyRect.right - srcTileCopyRect.left; } // Advanced to the next row. copyRect.top += srcTileCopyRect.bottom - srcTileCopyRect.top; } } } } template void ImmediateContext::ClearViewWithNoRenderTarget(View* pView, CONST FLOAT color[4], UINT NumRects, const D3D12_RECT *pRects) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); Resource *pResource = pView->m_pResource; for (auto range : pView->m_subresources) { for (UINT subresource = range.first; subresource < range.second; ++subresource) { DXGI_FORMAT format = pResource->GetSubresourcePlacement(subresource).Footprint.Format; // We should always have a full type at this point, since the resource cache will fail to create RTVs otherwise. // We should only get this far on fully typed resources, but we'll need to patch up planar resources, // as the footprint ends up with a typeless format for each plane. Currently we only support 8 and 16 bit // planar, with one or two channels per plane. switch (format) { case DXGI_FORMAT_R8_TYPELESS: format = DXGI_FORMAT_R8_UINT; break; case DXGI_FORMAT_R8G8_TYPELESS: format = DXGI_FORMAT_R8G8_UINT; break; case DXGI_FORMAT_R16_TYPELESS: format = DXGI_FORMAT_R16_UINT; break; case DXGI_FORMAT_R16G16_TYPELESS: format = DXGI_FORMAT_R16G16_UINT; break; } assert(format == pResource->AppDesc()->Format() || CD3D11FormatHelper::Planar(pResource->AppDesc()->Format())); ClearResourceWithNoRenderTarget(pResource, color, NumRects, pRects, subresource, pView->m_subresources.begin().StartSubresource(), format); } } PostRender(COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearVideoDecoderOutputView(VDOV *pVDOV, CONST FLOAT color[4], UINT NumRects, const D3D12_RECT *pRects) { ClearViewWithNoRenderTarget(pVDOV, color, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearVideoProcessorInputView(VPIV *pVPIV, CONST FLOAT color[4], UINT NumRects, const D3D12_RECT *pRects) { ClearViewWithNoRenderTarget(pVPIV, color, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearVideoProcessorOutputView(VPOV *pVPOV, CONST FLOAT color[4], UINT NumRects, const D3D12_RECT *pRects) { ClearViewWithNoRenderTarget(pVPOV, color, NumRects, pRects); } //---------------------------------------------------------------------------------------------------------------------------------- // Note: no resource transitions occur as a result of discard void TRANSLATION_API ImmediateContext::DiscardView(ViewBase* pView, const D3D12_RECT* pRects, UINT NumRects) { UINT commandListTypeMask = pView->m_pResource->GetCommandListTypeMask(pView->m_subresources); if (commandListTypeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { // TODO: output a no-op msg for this case return; } if (pView->m_pResource->Parent()->ResourceDimension12() != D3D12_RESOURCE_DIMENSION_TEXTURE2D && NumRects) { // D3D12 will treat this as invalid // Since this call is just a hint anyway, drop the call return; } // D3D12 requires RenderTargets and DepthStenciles to be transitioned to the corresponding write state before discard. auto pAPIResource = GetUnderlyingResource(pView->m_pResource); D3D12_RESOURCE_FLAGS ResourceFlags = pAPIResource->GetDesc().Flags; if ((ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || (ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { D3D12_RESOURCE_STATES RequiredState = (ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? D3D12_RESOURCE_STATE_RENDER_TARGET : D3D12_RESOURCE_STATE_DEPTH_WRITE; TransitionResourceForView(pView, RequiredState); m_ResourceStateManager.ApplyAllResourceTransitions(); } // TODO: Tokenize Discard operations and perform them just-in-time before future render ops // to ensure they happen on the same command list as the one doing the op. bool allSubresourcesSame = IsSingleCommandListType(commandListTypeMask); for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if (commandListTypeMask & (1 << i)) { DiscardViewImpl((COMMAND_LIST_TYPE)i, pView, pRects, NumRects, allSubresourcesSame); } } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::DiscardViewImpl(COMMAND_LIST_TYPE commandListType, ViewBase* pView, const D3D12_RECT* pRects, UINT NumRects, bool allSubresourcesSame) { PreRender(commandListType); D3D12_DISCARD_REGION Desc; Desc.pRects = reinterpret_cast(pRects); Desc.NumRects = NumRects; auto pAPIResource = GetUnderlyingResource(pView->m_pResource); pView->m_pResource->UsedInCommandList(commandListType, GetCommandListID(commandListType)); static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "ImmediateContext::DiscardView must support all command list types."); auto pfnDiscardResource = [&]() { switch (commandListType) { case COMMAND_LIST_TYPE::GRAPHICS: GetGraphicsCommandList()->DiscardResource(pAPIResource, &Desc); break; case COMMAND_LIST_TYPE::VIDEO_DECODE: GetVideoDecodeCommandList()->DiscardResource(pAPIResource, &Desc); break; case COMMAND_LIST_TYPE::VIDEO_PROCESS: GetVideoProcessCommandList()->DiscardResource(pAPIResource, &Desc); break; } }; if (allSubresourcesSame) { for (auto range : pView->m_subresources) { Desc.FirstSubresource = range.first; Desc.NumSubresources = range.second - range.first; pfnDiscardResource(); } } else { // need to discard on a per-subresource basis Desc.NumSubresources = 1; for (auto range : pView->m_subresources) { for (UINT subResource = range.first; subResource < range.second; subResource++) { if (pView->m_pResource->GetCommandListTypeMask(subResource) & (UINT)commandListType) { Desc.FirstSubresource = subResource; pfnDiscardResource(); } } } } PostRender(commandListType); } //---------------------------------------------------------------------------------------------------------------------------------- // Note: no resource transitions occur as a result of discard void TRANSLATION_API ImmediateContext::DiscardResource(Resource* pResource, const D3D12_RECT* pRects, UINT NumRects) { UINT commandListTypeMask = pResource->GetCommandListTypeMask(); if (commandListTypeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { // TODO: output a no-op msg for this case return; } if (pResource->Parent()->ResourceDimension12() != D3D12_RESOURCE_DIMENSION_TEXTURE2D && NumRects) { // D3D12 will treat this as invalid // Since this call is just a hint anyway, drop the call return; } auto pAPIResource = GetUnderlyingResource(pResource); // D3D12 requires RenderTargets and DepthStenciles to be transitioned to the corresponding write state before discard. D3D12_RESOURCE_FLAGS ResourceFlags = pAPIResource->GetDesc().Flags; if ((ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) || (ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) { D3D12_RESOURCE_STATES RequiredState = (ResourceFlags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? D3D12_RESOURCE_STATE_RENDER_TARGET : D3D12_RESOURCE_STATE_DEPTH_WRITE; m_ResourceStateManager.TransitionResource(pResource, RequiredState); m_ResourceStateManager.ApplyAllResourceTransitions(); } // TODO: Tokenize Discard operations and perform them just-in-time before future render ops // to ensure they happen on the same command list as the one doing the op. bool allSubresourcesSame = IsSingleCommandListType(commandListTypeMask); for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if (commandListTypeMask & (1 << i)) { DiscardResourceImpl((COMMAND_LIST_TYPE)i, pResource, pRects, NumRects, allSubresourcesSame); } } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::DiscardResourceImpl(COMMAND_LIST_TYPE commandListType, Resource* pResource, const D3D12_RECT* pRects, UINT NumRects, bool allSubresourcesSame) { PreRender(commandListType); D3D12_DISCARD_REGION Desc; Desc.pRects = reinterpret_cast(pRects); Desc.NumRects = NumRects; auto pAPIResource = GetUnderlyingResource(pResource); static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "ImmediateContext::DiscardResource must support all command list types."); auto pfnDiscardResource = [&]() { switch (commandListType) { case COMMAND_LIST_TYPE::GRAPHICS: GetGraphicsCommandList()->DiscardResource(pAPIResource, &Desc); break; case COMMAND_LIST_TYPE::VIDEO_DECODE: GetVideoDecodeCommandList()->DiscardResource(pAPIResource, &Desc); break; case COMMAND_LIST_TYPE::VIDEO_PROCESS: GetVideoProcessCommandList()->DiscardResource(pAPIResource, &Desc); break; } }; if (allSubresourcesSame) { Desc.FirstSubresource = 0; Desc.NumSubresources = pResource->NumSubresources(); pfnDiscardResource(); } else { Desc.NumSubresources = 1; for (UINT subResource = 0; subResource < pResource->NumSubresources(); subResource++) { if (pResource->GetCommandListTypeMask(subResource) & (UINT)commandListType) { Desc.FirstSubresource = subResource; pfnDiscardResource(); } } } PostRender(commandListType); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::EnsureInternalUAVRootSig() noexcept(false) { if (!m_InternalUAVRootSig.Created()) { CD3DX12_DESCRIPTOR_RANGE1 UAVSlot; UAVSlot.Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0); CD3DX12_ROOT_PARAMETER1 RootParams[2]; RootParams[0].InitAsDescriptorTable(1, &UAVSlot); RootParams[1].InitAsConstants(NUM_UAV_ROOT_SIG_CONSTANTS, 0); CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC RootSigDesc(2, RootParams); m_InternalUAVRootSig.Create(RootSigDesc); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::EnsureDrawAutoResources() noexcept(false) { EnsureInternalUAVRootSig(); // throw (_com_error); if (!m_pDrawAutoPSO) { D3D12_COMPUTE_PIPELINE_STATE_DESC PSODesc; ZeroMemory(&PSODesc, sizeof(PSODesc)); PSODesc.pRootSignature = m_InternalUAVRootSig.GetRootSignature(); PSODesc.CS.pShaderBytecode = g_DrawAutoCS; PSODesc.CS.BytecodeLength = sizeof(g_DrawAutoCS); PSODesc.NodeMask = GetNodeMask(); HRESULT hr = m_pDevice12->CreateComputePipelineState(&PSODesc, IID_PPV_ARGS(&m_pDrawAutoPSO)); ThrowFailure(hr); // throw( _com_error ) } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::EnsureQueryResources() noexcept(false) { EnsureInternalUAVRootSig(); // throw (_com_error); if (!m_pFormatQueryPSO) { D3D12_COMPUTE_PIPELINE_STATE_DESC PSODesc; ZeroMemory(&PSODesc, sizeof(PSODesc)); PSODesc.pRootSignature = m_InternalUAVRootSig.GetRootSignature(); PSODesc.CS.pShaderBytecode = g_FormatQueryCS; PSODesc.CS.BytecodeLength = sizeof(g_FormatQueryCS); PSODesc.NodeMask = GetNodeMask(); HRESULT hr = m_pDevice12->CreateComputePipelineState(&PSODesc, IID_PPV_ARGS(&m_pFormatQueryPSO)); ThrowFailure(hr); // throw( _com_error ) } if (!m_pAccumulateQueryPSO) { D3D12_COMPUTE_PIPELINE_STATE_DESC PSODesc; ZeroMemory(&PSODesc, sizeof(PSODesc)); PSODesc.pRootSignature = m_InternalUAVRootSig.GetRootSignature(); PSODesc.CS.pShaderBytecode = g_AccumulateQueryCS; PSODesc.CS.BytecodeLength = sizeof(g_AccumulateQueryCS); PSODesc.NodeMask = GetNodeMask(); HRESULT hr = m_pDevice12->CreateComputePipelineState(&PSODesc, IID_PPV_ARGS(&m_pAccumulateQueryPSO)); ThrowFailure(hr); // throw( _com_error ) } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::EnsureExecuteIndirectResources() noexcept(false) { HRESULT hr = S_OK; if (!m_pDrawInstancedCommandSignature) { D3D12_INDIRECT_ARGUMENT_DESC IndirectArg = {}; IndirectArg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW; D3D12_COMMAND_SIGNATURE_DESC CommandSignatureDesc = {}; CommandSignatureDesc.ByteStride = sizeof(D3D12_DRAW_ARGUMENTS); CommandSignatureDesc.NumArgumentDescs = 1; CommandSignatureDesc.pArgumentDescs = &IndirectArg; CommandSignatureDesc.NodeMask = GetNodeMask(); hr = m_pDevice12->CreateCommandSignature( &CommandSignatureDesc, nullptr, IID_PPV_ARGS(&m_pDrawInstancedCommandSignature) ); ThrowFailure(hr); } if (!m_pDrawIndexedInstancedCommandSignature) { D3D12_INDIRECT_ARGUMENT_DESC IndirectArg = {}; IndirectArg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED; D3D12_COMMAND_SIGNATURE_DESC CommandSignatureDesc = {}; CommandSignatureDesc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); CommandSignatureDesc.NumArgumentDescs = 1; CommandSignatureDesc.pArgumentDescs = &IndirectArg; CommandSignatureDesc.NodeMask = GetNodeMask(); hr = m_pDevice12->CreateCommandSignature( &CommandSignatureDesc, nullptr, IID_PPV_ARGS(&m_pDrawIndexedInstancedCommandSignature) ); ThrowFailure(hr); } if (!m_pDispatchCommandSignature) { D3D12_INDIRECT_ARGUMENT_DESC IndirectArg = {}; IndirectArg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; D3D12_COMMAND_SIGNATURE_DESC CommandSignatureDesc = {}; CommandSignatureDesc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS); CommandSignatureDesc.NumArgumentDescs = 1; CommandSignatureDesc.pArgumentDescs = &IndirectArg; CommandSignatureDesc.NodeMask = GetNodeMask(); hr = m_pDevice12->CreateCommandSignature( &CommandSignatureDesc, nullptr, IID_PPV_ARGS(&m_pDispatchCommandSignature) ); ThrowFailure(hr); } } //---------------------------------------------------------------------------------------------------------------------------------- ID3D12PipelineState* ImmediateContext::PrepareGenerateMipsObjects(DXGI_FORMAT Format, D3D12_RESOURCE_DIMENSION Dimension) noexcept(false) { MipGenKey Key( Format, Dimension ); auto iter = m_pGenerateMipsPSOMap.find(Key); if (iter != m_pGenerateMipsPSOMap.end()) { return iter->second.get(); } HRESULT hr = S_OK; if (!m_GenerateMipsRootSig.Created()) { // GenMips uses a custom RootSig to allow binding constants without needing to burn heap space for constant buffers // Total bindings: One SRV (the GenMips SRV), two root constants, and one sampler D3D12_SAMPLER_DESC SamplerDesc{}; SamplerDesc.MinLOD = 0; SamplerDesc.MaxLOD = 9999.0f; SamplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; SamplerDesc.MipLODBias = 0; SamplerDesc.MaxAnisotropy = 1; SamplerDesc.AddressU = SamplerDesc.AddressV = SamplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; auto pfnInitializeSampler = [=](D3D12_FILTER_TYPE FilterType, D3D12_FILTER SamplerFilter, D3D12_SAMPLER_DESC& SamplerDesc) { SamplerDesc.Filter = SamplerFilter; m_GenerateMipsSamplers[FilterType] = m_SamplerAllocator.AllocateHeapSlot(); // throw( _com_error ) m_pDevice12->CreateSampler(&SamplerDesc, m_GenerateMipsSamplers[FilterType]); }; pfnInitializeSampler(D3D12_FILTER_TYPE_LINEAR, D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT, SamplerDesc); pfnInitializeSampler(D3D12_FILTER_TYPE_POINT, D3D12_FILTER_MIN_MAG_MIP_POINT, SamplerDesc); CD3DX12_DESCRIPTOR_RANGE1 SRVSlot; SRVSlot.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE); CD3DX12_DESCRIPTOR_RANGE1 SamplerSlot; SamplerSlot.Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 1, 0); CD3DX12_ROOT_PARAMETER1 RootParams[3]; RootParams[GenerateMipsRootSignatureSlots::eSRV].InitAsDescriptorTable(1, &SRVSlot); RootParams[GenerateMipsRootSignatureSlots::eRootConstants].InitAsConstants(3, 0); RootParams[GenerateMipsRootSignatureSlots::eSampler].InitAsDescriptorTable(1, &SamplerSlot); CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC RootSigDesc(_countof(RootParams), RootParams); m_GenerateMipsRootSig.Create(RootSigDesc); } D3D12_GRAPHICS_PIPELINE_STATE_DESC PSODesc = {}; PSODesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); PSODesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); PSODesc.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); PSODesc.DepthStencilState.DepthEnable = FALSE; PSODesc.pRootSignature = m_GenerateMipsRootSig.GetRootSignature(); PSODesc.VS = { g_GenMipsVS, sizeof(g_GenMipsVS) }; switch(Dimension) { case D3D12_RESOURCE_DIMENSION_TEXTURE1D: PSODesc.PS = { g_GenMipsPS1D, sizeof(g_GenMipsPS1D) }; break; case D3D12_RESOURCE_DIMENSION_TEXTURE2D: PSODesc.PS = { g_GenMipsPS2D, sizeof(g_GenMipsPS2D) }; break; case D3D12_RESOURCE_DIMENSION_TEXTURE3D: PSODesc.PS = { g_GenMipsPS3D, sizeof(g_GenMipsPS3D) }; break; default: ASSUME(false); } PSODesc.DSVFormat = DXGI_FORMAT_UNKNOWN; PSODesc.RTVFormats[0] = Format; PSODesc.NumRenderTargets = 1; PSODesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF; PSODesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; PSODesc.SampleDesc.Count = 1; PSODesc.SampleMask = 0xffffffff; PSODesc.NodeMask = GetNodeMask(); unique_comptr spPSO; hr = m_pDevice12->CreateGraphicsPipelineState(&PSODesc, IID_PPV_ARGS(&spPSO)); ThrowFailure(hr); // throw( _com_error ) auto insertRet = m_pGenerateMipsPSOMap.emplace(Key, std::move(spPSO)); iter = insertRet.first; return iter->second.get(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::GenMips( SRV *pSRV, D3D12_FILTER_TYPE FilterType) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"GenerateMips"); #endif PreRender(COMMAND_LIST_TYPE::GRAPHICS); auto pResource = pSRV->m_pResource; // GenerateMips is deprecated in D3D12 // It is implemented in 11on12 via draws which average the pixels of the higher mips into the pixels of the lower mips D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = pSRV->GetDesc12(); D3D12_RENDER_TARGET_VIEW_DESC RTVDesc; RTVDesc.Format = SRVDesc.Format; // Prep RTV desc with properties that are common across all mips // Not using arrayed RTVs because that would require a geometry shader switch(SRVDesc.ViewDimension) { case D3D12_SRV_DIMENSION_TEXTURE1D: RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; break; case D3D12_SRV_DIMENSION_TEXTURE1DARRAY: RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1DARRAY; RTVDesc.Texture1DArray.ArraySize = 1; break; case D3D12_SRV_DIMENSION_TEXTURE2D: RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; RTVDesc.Texture2D.PlaneSlice = 0; // Only non-planar surfaces support mipmaps. break; case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: case D3D12_SRV_DIMENSION_TEXTURECUBE: case D3D12_SRV_DIMENSION_TEXTURECUBEARRAY: RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; RTVDesc.Texture2DArray.ArraySize = 1; RTVDesc.Texture2DArray.PlaneSlice = 0; // Only non-planar surfaces support mipmaps. break; case D3D12_SRV_DIMENSION_TEXTURE3D: RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; RTVDesc.Texture3D.WSize = 1; break; default: ASSUME(false); } // Retrieve the appropriate PSO // The shaders used for the operation vary depending on resource dimension, // and the render target format must be baked into the PSO as well D3D12_RESOURCE_DIMENSION Dimension = pResource->Parent()->ResourceDimension12(); DXGI_FORMAT Format = pSRV->GetDesc12().Format; ID3D12PipelineState* pGenMipsPSO = PrepareGenerateMipsObjects(Format, Dimension); // throw( _com_error, bad_alloc ) UINT ResMipLevels = pResource->AppDesc()->MipLevels(); // If there's no room for the SRV descriptor in the online heap, re-create it before applying state to the command list UINT ViewHeapSlot = ReserveSlots(m_ViewHeap, 1); // throw( _com_error ) D3D12_GPU_DESCRIPTOR_HANDLE GPUDescriptor = m_ViewHeap.GPUHandle(ViewHeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptor = m_ViewHeap.CPUHandle(ViewHeapSlot); UINT SamplerHeapSlot = ReserveSlots(m_SamplerHeap, 1); D3D12_GPU_DESCRIPTOR_HANDLE GPUSamplerDescriptor = m_SamplerHeap.GPUHandle(SamplerHeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE CPUSamplerDescriptor = m_SamplerHeap.CPUHandle(SamplerHeapSlot); #if DBG ID3D12GraphicsCommandList* pSnapshotCmdList = GetGraphicsCommandList(); #endif // All state that is applied for this operation must be overridden by the app's state on the next draw // Set the reassert bits to ensure that the next draw does the right state binding GetGraphicsCommandList()->SetPipelineState(pGenMipsPSO); m_StatesToReassert |= e_PipelineStateDirty; GetGraphicsCommandList()->SetGraphicsRootSignature(m_GenerateMipsRootSig.GetRootSignature()); m_StatesToReassert |= e_GraphicsRootSignatureDirty; GetGraphicsCommandList()->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); m_StatesToReassert |= e_PrimitiveTopologyDirty; { // Unbind all VBs D3D12_VERTEX_BUFFER_VIEW VBVArray[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; memset(VBVArray, 0, sizeof(VBVArray)); GetGraphicsCommandList()->IASetVertexBuffers(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, VBVArray); m_StatesToReassert |= e_VertexBuffersDirty; } // Bind SRV if (SRVDesc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE || SRVDesc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBEARRAY) { UINT NumCubes = 1; UINT MipLevels = 0; UINT FirstSlice = 0; UINT FirstMip = 0; if (SRVDesc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURECUBE) { MipLevels = SRVDesc.TextureCube.MipLevels; FirstMip = SRVDesc.TextureCube.MostDetailedMip; } else { MipLevels = SRVDesc.TextureCubeArray.MipLevels; FirstMip = SRVDesc.TextureCubeArray.MostDetailedMip; FirstSlice = SRVDesc.TextureCubeArray.First2DArrayFace; NumCubes = SRVDesc.TextureCubeArray.NumCubes; } SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; SRVDesc.Texture2DArray.MostDetailedMip = FirstMip; SRVDesc.Texture2DArray.MipLevels = MipLevels; SRVDesc.Texture2DArray.FirstArraySlice = FirstSlice; SRVDesc.Texture2DArray.ArraySize = NumCubes * 6; SRVDesc.Texture2DArray.PlaneSlice = 0; SRVDesc.Texture2DArray.ResourceMinLODClamp = 0.0f; m_pDevice12->CreateShaderResourceView( pResource->GetUnderlyingResource(), &SRVDesc, CPUDescriptor ); } else { m_pDevice12->CopyDescriptorsSimple( 1, CPUDescriptor, pSRV->GetRefreshedDescriptorHandle(), m_ViewHeap.m_Desc.Type ); } GetGraphicsCommandList()->SetGraphicsRootDescriptorTable(GenerateMipsRootSignatureSlots::eSRV, GPUDescriptor); // Bind Sampler m_pDevice12->CopyDescriptorsSimple(1, CPUSamplerDescriptor, m_GenerateMipsSamplers[FilterType], m_SamplerHeap.m_Desc.Type); GetGraphicsCommandList()->SetGraphicsRootDescriptorTable(GenerateMipsRootSignatureSlots::eSampler, GPUSamplerDescriptor); // Get RTV descriptor UINT RTVDescriptorHeapIndex; D3D12_CPU_DESCRIPTOR_HANDLE RTVDescriptor = m_RTVAllocator.AllocateHeapSlot(&RTVDescriptorHeapIndex); // throw( _com_error ) // For each contiguous range of subresources contained in the view for(auto subresourceRange : pSRV->m_subresources) { UINT start = subresourceRange.first; UINT end = subresourceRange.second; UINT minMip = start % ResMipLevels; UINT minSlice = start / ResMipLevels; for (UINT subresource = start + 1; subresource < end; ++subresource) { UINT mipLevel = subresource % ResMipLevels; if (mipLevel == 0) { // The view contains all the mips of some slices of an arrayed resource, so the outer loop includes multiple mip 0s assert(start % ResMipLevels == 0 && end % ResMipLevels == 0); continue; } UINT arraySlice = subresource / ResMipLevels; m_ResourceStateManager.TransitionSubresource(pResource, subresource - 1, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); m_ResourceStateManager.TransitionSubresource(pResource, subresource, D3D12_RESOURCE_STATE_RENDER_TARGET); m_ResourceStateManager.ApplyAllResourceTransitions(); auto& Placement = pResource->GetSubresourcePlacement(subresource); D3D12_VIEWPORT Viewport = { 0, 0, (FLOAT)Placement.Footprint.Width, (FLOAT)Placement.Footprint.Height, 0, 1 }; D3D12_RECT Scissor = { 0, 0, (LONG)Placement.Footprint.Width, (LONG)Placement.Footprint.Height }; GetGraphicsCommandList()->RSSetViewports(1, &Viewport); GetGraphicsCommandList()->RSSetScissorRects(1, &Scissor); bool b3D = (Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D); UINT numIterations = b3D ? Placement.Footprint.Depth : 1; for (UINT wSlice = 0; wSlice < numIterations; ++wSlice) { UINT arrayOrWSlice = b3D ? wSlice * 2 : arraySlice; switch(RTVDesc.ViewDimension) { case D3D12_RTV_DIMENSION_TEXTURE1D: RTVDesc.Texture1D.MipSlice = mipLevel; break; case D3D12_RTV_DIMENSION_TEXTURE1DARRAY: RTVDesc.Texture1DArray.MipSlice = mipLevel; RTVDesc.Texture1DArray.FirstArraySlice = arraySlice; break; case D3D12_RTV_DIMENSION_TEXTURE2D: RTVDesc.Texture2D.MipSlice = mipLevel; break; case D3D12_RTV_DIMENSION_TEXTURE2DARRAY: RTVDesc.Texture2DArray.MipSlice = mipLevel; RTVDesc.Texture2DArray.FirstArraySlice = arraySlice; break; case D3D12_RTV_DIMENSION_TEXTURE3D: assert(minSlice == 0); RTVDesc.Texture3D.MipSlice = mipLevel; RTVDesc.Texture3D.FirstWSlice = wSlice; break; default: ASSUME(false); } m_pDevice12->CreateRenderTargetView(pResource->GetUnderlyingResource(), &RTVDesc, RTVDescriptor); GetGraphicsCommandList()->OMSetRenderTargets(1, &RTVDescriptor, false, nullptr); GetGraphicsCommandList()->SetGraphicsRoot32BitConstant(GenerateMipsRootSignatureSlots::eRootConstants, mipLevel - 1 - minMip, 0); GetGraphicsCommandList()->SetGraphicsRoot32BitConstant(GenerateMipsRootSignatureSlots::eRootConstants, arrayOrWSlice - minSlice, 1); if (b3D) { // We calculate the w value here for 3D textures to avoid requiring the shader to calculate it // every pixel. Add a half point to the wSlice to make sure we sample from the two relevant // z planes equally float wVal = (wSlice + .5f) / numIterations; GetGraphicsCommandList()->SetGraphicsRoot32BitConstant(GenerateMipsRootSignatureSlots::eRootConstants, *(UINT*) (&wVal), 2); } GetGraphicsCommandList()->DrawInstanced(4, 1, 0, 0); } } } // Free RTV descriptor m_RTVAllocator.FreeHeapSlot(RTVDescriptor, RTVDescriptorHeapIndex); // Dirty bits for multiply-updated states m_StatesToReassert |= e_RenderTargetsDirty; m_StatesToReassert |= e_ViewportsDirty; m_StatesToReassert |= e_ScissorRectsDirty; // Ensure that there were no flushes during the GenMips process // Since the state was applied directly to the command list, it will NOT be re-applied to a new command list #if DBG // Required for OACR assert(GetGraphicsCommandList() == pSnapshotCmdList); #endif PostRender(COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::UAVBarrier() noexcept { D3D12_RESOURCE_BARRIER BarrierDesc; ZeroMemory(&BarrierDesc, sizeof(BarrierDesc)); BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; GetGraphicsCommandList()->ResourceBarrier(1, &BarrierDesc); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::CopyAndConvertSubresourceRegion(Resource* pDst, UINT DstSubresource, Resource* pSrc, UINT SrcSubresource, UINT dstX, UINT dstY, UINT dstZ, const D3D12_BOX* pSrcBox) noexcept { assert(!Resource::IsSameUnderlyingSubresource(pSrc, SrcSubresource, pDst, DstSubresource)); struct CopyDesc { Resource* pResource; D3D12_TEXTURE_COPY_LOCATION View; } Descs[2]; Descs[0].pResource = pSrc; Descs[0].View.SubresourceIndex = SrcSubresource; Descs[0].View.pResource = pSrc->GetUnderlyingResource(); Descs[1].pResource = pDst; Descs[1].View.SubresourceIndex = DstSubresource; Descs[1].View.pResource = pDst->GetUnderlyingResource(); DXGI_FORMAT DefaultResourceFormat = DXGI_FORMAT_UNKNOWN; D3D12_BOX PatchedBox = {}; if (!pSrcBox) { PatchedBox = GetBoxFromResource(pSrc, SrcSubresource); pSrcBox = &PatchedBox; } for (UINT i = 0; i < 2; ++i) { if (Descs[i].pResource->m_Identity->m_bOwnsUnderlyingResource && !Descs[i].pResource->m_Identity->m_bPlacedTexture) { Descs[i].View.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; DefaultResourceFormat = Descs[i].pResource->GetSubresourcePlacement(Descs[i].View.SubresourceIndex).Footprint.Format; } else { Descs[i].View.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; Descs[i].View.PlacedFootprint = Descs[i].pResource->GetSubresourcePlacement(Descs[i].View.SubresourceIndex); } } bool bConvertSrc = false, bConvertDst = false; if (pSrc->AppDesc()->NonOpaquePlaneCount() == 1 && pDst->AppDesc()->NonOpaquePlaneCount() == 1) { if (DefaultResourceFormat == DXGI_FORMAT_UNKNOWN) { // No default resources, or two buffers, check if we have to convert one DefaultResourceFormat = Descs[1].pResource->GetSubresourcePlacement(DstSubresource).Footprint.Format; bConvertDst = Descs[0].pResource->GetSubresourcePlacement(SrcSubresource).Footprint.Format != DefaultResourceFormat; if (!bConvertDst && DefaultResourceFormat == DXGI_FORMAT_UNKNOWN) { // This is a buffer to buffer copy // Special-case buffer to buffer copies assert(Descs[0].pResource->AppDesc()->ResourceDimension() == static_cast(D3D11_RESOURCE_DIMENSION_BUFFER) && Descs[1].pResource->AppDesc()->ResourceDimension() == static_cast(D3D11_RESOURCE_DIMENSION_BUFFER)); UINT64 SrcOffset = pSrcBox->left + GetDynamicBufferOffset(pSrc); UINT64 Size = pSrcBox->right - pSrcBox->left; GetGraphicsCommandList()->CopyBufferRegion(pDst->GetUnderlyingResource(), dstX + GetDynamicBufferOffset(pDst), pSrc->GetUnderlyingResource(), SrcOffset, Size); return; } } else if (Descs[0].View.Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX && Descs[1].View.Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) { // No conversion } else if (Descs[0].View.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT && Descs[0].pResource->GetSubresourcePlacement(SrcSubresource).Footprint.Format != DefaultResourceFormat) { assert(Descs[1].View.Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX); bConvertSrc = true; } else if (Descs[1].pResource->GetSubresourcePlacement(DstSubresource).Footprint.Format != DefaultResourceFormat) { assert(Descs[0].View.Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX); assert(Descs[1].View.Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT); bConvertDst = true; } } // Convert placement struct if (bConvertSrc || bConvertDst) { assert(pSrc->AppDesc()->NonOpaquePlaneCount() == 1 && pDst->AppDesc()->NonOpaquePlaneCount() == 1); UINT ConversionIndex = bConvertDst ? 1 : 0; UINT WidthAlignment[2], HeightAlignment[2]; auto& Placement = Descs[ConversionIndex].View.PlacedFootprint.Footprint; DXGI_FORMAT& Format = Placement.Format; WidthAlignment[0] = CD3D11FormatHelper::GetWidthAlignment( Format ); HeightAlignment[0] = CD3D11FormatHelper::GetHeightAlignment( Format ); Format = DefaultResourceFormat; WidthAlignment[1] = CD3D11FormatHelper::GetWidthAlignment( Format ); HeightAlignment[1] = CD3D11FormatHelper::GetHeightAlignment( Format ); Placement.Width = Placement.Width * WidthAlignment[1] / WidthAlignment[0]; Placement.Height = Placement.Height * HeightAlignment[1] / HeightAlignment[0]; // Convert coordinates/box if (bConvertSrc) { assert(pSrcBox); if (pSrcBox != &PatchedBox) { PatchedBox = *pSrcBox; pSrcBox = &PatchedBox; } PatchedBox.left = PatchedBox.left * WidthAlignment[1] / WidthAlignment[0]; PatchedBox.right = PatchedBox.right * WidthAlignment[1] / WidthAlignment[0]; PatchedBox.top = PatchedBox.top * HeightAlignment[1] / HeightAlignment[0]; PatchedBox.bottom = PatchedBox.bottom * HeightAlignment[1] / HeightAlignment[0]; } else if (bConvertDst) { dstX = dstX * WidthAlignment[1] / WidthAlignment[0]; dstY = dstY * HeightAlignment[1] / HeightAlignment[0]; } } // Actually issue the copy! GetGraphicsCommandList()->CopyTextureRegion(&Descs[1].View, dstX, dstY, dstZ, &Descs[0].View, pSrcBox); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ResourceCopy(Resource* pDst, Resource* pSrc ) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); assert(pSrc->NumSubresources() == pDst->NumSubresources()); if (Resource::IsSameUnderlyingSubresource(pSrc, 0, pDst, 0)) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"Whole-resource copy"); #endif for (UINT Subresource = 0; Subresource < pSrc->NumSubresources(); ++Subresource) { SameResourceCopy(pDst, Subresource, pSrc, Subresource, 0, 0, 0, nullptr); } } else { m_ResourceStateManager.TransitionResource(pDst, D3D12_RESOURCE_STATE_COPY_DEST); m_ResourceStateManager.TransitionResource(pSrc, D3D12_RESOURCE_STATE_COPY_SOURCE); m_ResourceStateManager.ApplyAllResourceTransitions(); // Note that row-major placed textures must not be passed to CopyResource, because their underlying // buffer's size does not include padding for alignment to pool sizes like for staging textures. if ((pSrc->m_Identity->m_bOwnsUnderlyingResource && pDst->m_Identity->m_bOwnsUnderlyingResource && !pSrc->m_Identity->m_bPlacedTexture && !pDst->m_Identity->m_bPlacedTexture && pSrc->GetOffsetToStreamOutputSuffix() == pDst->GetOffsetToStreamOutputSuffix() && pSrc->IsBloatedConstantBuffer() == pDst->IsBloatedConstantBuffer())) { // Neither resource should be suballocated, so no offset adjustment required // We can do a straight resource copy from heap to heap #if DBG auto& DstPlacement = pDst->GetSubresourcePlacement(0); auto& SrcPlacement = pSrc->GetSubresourcePlacement(0); assert(SrcPlacement.Offset == 0 && DstPlacement.Offset == 0 && SrcPlacement.Footprint.RowPitch == DstPlacement.Footprint.RowPitch); #endif auto pAPIDst = pDst->GetUnderlyingResource(); auto pAPISrc = pSrc->GetUnderlyingResource(); GetGraphicsCommandList()->CopyResource(pAPIDst, pAPISrc); } else { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"Whole-resource copy"); #endif for (UINT Subresource = 0; Subresource < pSrc->NumSubresources(); ++Subresource) { CopyAndConvertSubresourceRegion(pDst, Subresource, pSrc, Subresource, 0, 0, 0, nullptr); } } } PostCopy(pSrc, 0, pDst, 0, pSrc->NumSubresources()); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ResourceResolveSubresource(Resource* pDst, UINT DstSubresource, Resource* pSrc, UINT SrcSubresource, DXGI_FORMAT Format ) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); assert(pDst->m_Identity->m_bOwnsUnderlyingResource); assert(pSrc->m_Identity->m_bOwnsUnderlyingResource); assert(pSrc->SubresourceMultiplier() == pDst->SubresourceMultiplier()); // Originally this would loop based on SubResourceMultiplier, allowing multiple planes to be resolved. // In practice, this was really only hit by depth+stencil formats, but we can only resolve the depth portion // since resolving the S8_UINT bit using averaging isn't valid. // Input subresources are plane-extended, except when dealing with depth+stencil UINT CurSrcSub = pSrc->GetExtendedSubresourceIndex(SrcSubresource, 0); UINT CurDstSub = pDst->GetExtendedSubresourceIndex(DstSubresource, 0); m_ResourceStateManager.TransitionSubresource(pDst, CurDstSub, D3D12_RESOURCE_STATE_RESOLVE_DEST); m_ResourceStateManager.TransitionSubresource(pSrc, CurSrcSub, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); m_ResourceStateManager.ApplyAllResourceTransitions(); auto pAPIDst = pDst->GetUnderlyingResource(); auto pAPISrc = pSrc->GetUnderlyingResource(); // ref for formats supporting MSAA resolve https://learn.microsoft.com/en-us/windows/win32/direct3ddxgi/format-support-for-direct3d-11-1-feature-level-hardware switch(Format) { // Can't resolve due to stencil UINT. Claim it's typeless and just resolve the depth plane case DXGI_FORMAT_D24_UNORM_S8_UINT: Format = DXGI_FORMAT_R24_UNORM_X8_TYPELESS; break; case DXGI_FORMAT_D32_FLOAT_S8X24_UINT: Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; break; // Can't resolve this particular flavor of depth format. Claim it's R16_UNORM instead case DXGI_FORMAT_D16_UNORM: Format = DXGI_FORMAT_R16_UNORM; break; } GetGraphicsCommandList()->ResolveSubresource(pAPIDst, CurDstSub, pAPISrc, CurSrcSub, Format); PostRender(COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetResourceMinLOD(Resource* pResource, FLOAT MinLOD ) { pResource->SetMinLOD(MinLOD); ++(pResource->m_SRVUniqueness); // Mark all potential SRV bind points as dirty CResourceBindings& bindingState = pResource->m_currentBindings; for (auto pCur = bindingState.m_ShaderResourceViewList.Flink; pCur != &bindingState.m_ShaderResourceViewList; pCur = pCur->Flink) { auto& viewBindings = *CONTAINING_RECORD(pCur, CViewBindings, m_ViewBindingList); for (UINT stage = 0; stage < ShaderStageCount; ++stage) { auto& stageState = m_CurrentState.GetStageState((EShaderStage)stage); stageState.m_SRVs.SetDirtyBits(viewBindings.m_BindPoints[stage]); } } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::CopyStructureCount(Resource* pDstResource, UINT DstAlignedByteOffset, UAV *pUAV) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"CopyStructureCount"); #endif PreRender(COMMAND_LIST_TYPE::GRAPHICS); pUAV->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); m_ResourceStateManager.TransitionResource(pDstResource, D3D12_RESOURCE_STATE_COPY_DEST); m_ResourceStateManager.ApplyAllResourceTransitions(); auto pAPIDst = pDstResource->GetUnderlyingResource(); UINT BufferOffset = DstAlignedByteOffset + GetDynamicBufferOffset(pDstResource); pUAV->CopyCounterToBuffer(pAPIDst, BufferOffset); PostCopy(nullptr, 0, pDstResource, 0, pDstResource->NumSubresources()); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ResourceCopyRegion(Resource* pDst, UINT DstSubresource, UINT DstX, UINT DstY, UINT DstZ, Resource* pSrc, UINT SrcSubresource, const D3D12_BOX* pSrcBox) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); assert(pSrc->SubresourceMultiplier() == pDst->SubresourceMultiplier()); UINT SubresourceMultiplier = pSrc->SubresourceMultiplier(); for (UINT i = 0; i < SubresourceMultiplier; ++i) { // Input subresources are plane-extended, except when dealing with depth+stencil UINT CurSrcSub = pSrc->GetExtendedSubresourceIndex(SrcSubresource, i); UINT CurDstSub = pDst->GetExtendedSubresourceIndex(DstSubresource, i); if (Resource::IsSameUnderlyingSubresource(pSrc, CurSrcSub, pDst, CurDstSub)) { SameResourceCopy(pDst, CurDstSub, pSrc, CurSrcSub, DstX, DstY, DstZ, pSrcBox); } else { m_ResourceStateManager.TransitionSubresource(pDst, CurDstSub, D3D12_RESOURCE_STATE_COPY_DEST); m_ResourceStateManager.TransitionSubresource(pSrc, CurSrcSub, D3D12_RESOURCE_STATE_COPY_SOURCE); m_ResourceStateManager.ApplyAllResourceTransitions(); CopyAndConvertSubresourceRegion(pDst, CurDstSub, pSrc, CurSrcSub, DstX, DstY, DstZ, pSrcBox); } PostCopy(pSrc, CurSrcSub, pDst, CurDstSub, 1); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::UploadDataToMappedBuffer(_In_reads_bytes_(Placement.Depth * DepthPitch) const void* pData, UINT SrcPitch, UINT SrcDepth, _Out_writes_bytes_(Placement.Depth * DepthPitch) void* pMappedData, D3D12_SUBRESOURCE_FOOTPRINT& Placement, UINT DepthPitch, UINT TightRowPitch) noexcept { bool bPlanar = !!CD3D11FormatHelper::Planar(Placement.Format); UINT NumRows = bPlanar ? Placement.Height : Placement.Height / CD3D11FormatHelper::GetHeightAlignment(Placement.Format); ASSUME(TightRowPitch <= DepthPitch); ASSUME(TightRowPitch <= Placement.RowPitch); ASSUME(NumRows <= Placement.Height); ASSUME(Placement.RowPitch * NumRows <= DepthPitch); // Fast-path: app gave us aligned memory if ((Placement.RowPitch == SrcPitch || Placement.Height == 1) && (DepthPitch == SrcDepth || Placement.Depth == 1)) { // Allow last row to be non-padded UINT CopySize = DepthPitch * (Placement.Depth - 1) + Placement.RowPitch * (NumRows - 1) + TightRowPitch; memcpy(pMappedData, pData, CopySize); } else { // Slow path: row-by-row memcpy D3D12_MEMCPY_DEST Dest = { pMappedData, Placement.RowPitch, DepthPitch }; D3D12_SUBRESOURCE_DATA Src = { pData, (LONG_PTR)SrcPitch, (LONG_PTR)SrcDepth }; MemcpySubresource(&Dest, &Src, TightRowPitch, NumRows, Placement.Depth); } } //---------------------------------------------------------------------------------------------------------------------------------- template void DeInterleaving2DCopy( _In_reads_(_Inexpressible_(sizeof(TInterleaved) * Width + SrcRowPitch * (Height - 1))) const BYTE* pSrcData, UINT SrcRowPitch, _Out_writes_(_Inexpressible_(sizeof(TPlanar) * Width + DstRowPitch * (Height - 1))) BYTE* pDstData, UINT DstRowPitch, UINT Width, UINT Height) { static_assert(sizeof(TInterleaved) >= sizeof(TPlanar), "Invalid types used for interleaving copy."); for (UINT y = 0; y < Height; ++y) { const TInterleaved* pSrcRow = reinterpret_cast(pSrcData + SrcRowPitch * y); TPlanar* pDstRow = reinterpret_cast(pDstData + DstRowPitch * y); for (UINT x = 0; x < Width; ++x) { pDstRow[x] = static_cast((pSrcRow[x] & Mask) >> Shift); } } } template void Interleaving2DCopy( _In_reads_(_Inexpressible_(sizeof(TPlanar) * Width + SrcRowPitch * (Height - 1))) const BYTE* pSrcData, UINT SrcRowPitch, _Out_writes_(_Inexpressible_(sizeof(TInterleaved) * Width + DstRowPitch * (Height - 1))) BYTE* pDstData, UINT DstRowPitch, UINT Width, UINT Height) { static_assert(sizeof(TInterleaved) >= sizeof(TPlanar), "Invalid types used for interleaving copy."); for (UINT y = 0; y < Height; ++y) { const TPlanar* pSrcRow = reinterpret_cast(pSrcData + SrcRowPitch * y); TInterleaved* pDstRow = reinterpret_cast(pDstData + DstRowPitch * y); for (UINT x = 0; x < Width; ++x) { pDstRow[x] |= (static_cast(pSrcRow[x] & Mask) << Shift); } } } //---------------------------------------------------------------------------------------------------------------------------------- void DepthStencilDeInterleavingUpload(DXGI_FORMAT ParentFormat, UINT PlaneIndex, const BYTE* pSrcData, UINT SrcRowPitch, BYTE* pDstData, UINT DstRowPitch, UINT Width, UINT Height) { ASSUME(PlaneIndex == 0 || PlaneIndex == 1); switch (ParentFormat) { case DXGI_FORMAT_R24G8_TYPELESS: { if (PlaneIndex == 0) DeInterleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); else DeInterleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); } break; case DXGI_FORMAT_R32G8X24_TYPELESS: { if (PlaneIndex == 0) DeInterleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); else DeInterleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); } break; default: ASSUME(false); } } //---------------------------------------------------------------------------------------------------------------------------------- void DepthStencilInterleavingReadback(DXGI_FORMAT ParentFormat, UINT PlaneIndex, const BYTE* pSrcData, UINT SrcRowPitch, BYTE* pDstData, UINT DstRowPitch, UINT Width, UINT Height) { ASSUME(PlaneIndex == 0 || PlaneIndex == 1); switch (ParentFormat) { case DXGI_FORMAT_R24G8_TYPELESS: { if (PlaneIndex == 0) Interleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); else Interleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); } break; case DXGI_FORMAT_R32G8X24_TYPELESS: { if (PlaneIndex == 0) Interleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); else Interleaving2DCopy(pSrcData, SrcRowPitch, pDstData, DstRowPitch, Width, Height); } break; default: ASSUME(false); } } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT Swap10bitRBPixel(UINT pixel) { constexpr UINT alphaMask = 3u << 30u; constexpr UINT blueMask = 0x3FFu << 20u; constexpr UINT greenMask = 0x3FFu << 10u; constexpr UINT redMask = 0x3FFu; return (pixel & (alphaMask | greenMask)) | ((pixel & blueMask) >> 20u) | ((pixel & redMask) << 20u); } //---------------------------------------------------------------------------------------------------------------------------------- inline void Swap10bitRBUpload(const BYTE* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch, BYTE* pDstData, UINT DstRowPitch, UINT DstDepthPitch, UINT Width, UINT Height, UINT Depth) { for (UINT z = 0; z < Depth; ++z) { auto pSrcSlice = pSrcData + SrcDepthPitch * z; auto pDstSlice = pDstData + DstDepthPitch * z; for (UINT y = 0; y < Height; ++y) { auto pSrcRow = pSrcSlice + SrcRowPitch * y; auto pDstRow = pDstSlice + DstRowPitch * y; for (UINT x = 0; x < Width; ++x) { reinterpret_cast(pDstRow)[x] = Swap10bitRBPixel(reinterpret_cast(pSrcRow)[x]); } } } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void ImmediateContext::FinalizeUpdateSubresources(Resource* pDst, PreparedUpdateSubresourcesOperation const& PreparedStorage, D3D12_PLACED_SUBRESOURCE_FOOTPRINT const* LocalPlacementDescs) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"UpdateSubresource on GPU timeline"); #endif bool bUseLocalPlacement = LocalPlacementDescs != nullptr; const UINT8 PlaneCount = (pDst->SubresourceMultiplier() * pDst->AppDesc()->NonOpaquePlaneCount()); D3D12ResourceSuballocation mappableResource = PreparedStorage.EncodedBlock.Decode(); CViewSubresourceSubset SubresourceIteration(PreparedStorage.EncodedSubresourceSubset, pDst->AppDesc()->MipLevels(), pDst->AppDesc()->ArraySize(), PlaneCount); // Copy contents over from the temporary upload heap ID3D12GraphicsCommandList *pGraphicsCommandList = GetGraphicsCommandList(); m_ResourceStateManager.TransitionSubresources(pDst, SubresourceIteration, D3D12_RESOURCE_STATE_COPY_DEST); m_ResourceStateManager.ApplyAllResourceTransitions(); auto DoFinalize = [&]() { UINT PlacementIdx = 0; for (const auto& it : SubresourceIteration) { for (UINT Subresource = it.first; Subresource < it.second; ++Subresource, ++PlacementIdx) { auto& Placement = bUseLocalPlacement ? LocalPlacementDescs[PlacementIdx] : pDst->GetSubresourcePlacement(Subresource); D3D12_BOX srcBox = { 0, 0, 0, Placement.Footprint.Width, Placement.Footprint.Height, Placement.Footprint.Depth }; if (pDst->AppDesc()->ResourceDimension() == D3D12_RESOURCE_DIMENSION_BUFFER) { ASSUME(Placement.Footprint.Height == 1 && Placement.Footprint.Depth == 1); UINT64 srcOffset = mappableResource.GetOffset(); UINT64 dstOffset = pDst->GetSubresourcePlacement(0).Offset + (PreparedStorage.bDstBoxPresent ? PreparedStorage.DstX : 0); pGraphicsCommandList->CopyBufferRegion(pDst->GetUnderlyingResource(), dstOffset, mappableResource.GetResource(), srcOffset, Placement.Footprint.Width); } else { D3D12_TEXTURE_COPY_LOCATION SrcDesc = mappableResource.GetCopyLocation(Placement); SrcDesc.PlacedFootprint.Offset -= PreparedStorage.OffsetAdjustment; bool bDstPlacedTexture = !pDst->m_Identity->m_bOwnsUnderlyingResource || pDst->m_Identity->m_bPlacedTexture; D3D12_TEXTURE_COPY_LOCATION DstDesc = bDstPlacedTexture ? CD3DX12_TEXTURE_COPY_LOCATION(pDst->GetUnderlyingResource(), pDst->GetSubresourcePlacement(Subresource)) : CD3DX12_TEXTURE_COPY_LOCATION(pDst->GetUnderlyingResource(), Subresource); pGraphicsCommandList->CopyTextureRegion(&DstDesc, PreparedStorage.DstX, PreparedStorage.DstY, PreparedStorage.DstZ, &SrcDesc, &srcBox); } } } }; if (PreparedStorage.bDisablePredication) { CDisablePredication DisablePredication(this); DoFinalize(); } else { DoFinalize(); } AdditionalCommandsAdded(COMMAND_LIST_TYPE::GRAPHICS); PostUpload(); ReleaseSuballocatedHeap(AllocatorHeapType::Upload, mappableResource, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS), COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- ImmediateContext::CPrepareUpdateSubresourcesHelper::CPrepareUpdateSubresourcesHelper( Resource& Dst, CSubresourceSubset const& Subresources, const D3D11_SUBRESOURCE_DATA* pSrcData, const D3D12_BOX* pDstBox, UpdateSubresourcesFlags flags, const void* pClearPattern, UINT ClearPatternSize, ImmediateContext& ImmCtx) : Dst(Dst) , Subresources(Subresources) , bDstBoxPresent(pDstBox != nullptr) { #ifdef USE_PIX PIXScopedEvent(0ull, L"UpdateSubresource on CPU timeline"); #endif #if DBG AssertPreconditions(pSrcData, pClearPattern); #endif bool bEmptyBox = InitializePlacementsAndCalculateSize(pDstBox, ImmCtx); if (bEmptyBox) { return; } InitializeMappableResource(flags, ImmCtx, pDstBox); FinalizeNeeded = CachedNeedsTemporaryUploadHeap; UploadDataToMappableResource(pSrcData, ImmCtx, pDstBox, pClearPattern, ClearPatternSize, flags); if (FinalizeNeeded) { WriteOutputParameters(pDstBox, flags); } } //---------------------------------------------------------------------------------------------------------------------------------- #if DBG void ImmediateContext::CPrepareUpdateSubresourcesHelper::AssertPreconditions(const D3D11_SUBRESOURCE_DATA* pSrcData, const void* pClearPattern) { // Currently only handles initial data and UpdateSubresource-type operations // This means: 1 plane, 1 legacy subresource, or all subresources with no box assert(NumSrcData == 1U || (NumSrcData == static_cast(Dst.AppDesc()->MipLevels() * Dst.AppDesc()->ArraySize()) && !bDstBoxPresent && !pClearPattern)); assert(NumDstSubresources == 1U || NumDstSubresources == Dst.SubresourceMultiplier() || (NumDstSubresources == Dst.NumSubresources() && !bDstBoxPresent && !pClearPattern)); // This routine accepts either a clear color (one pixel worth of data) or a SUBRESOURCE_DATA struct (minimum one row of data) assert(!(pClearPattern && pSrcData)); ASSUME(!bUseLocalPlacement || NumDstSubresources == 1 || (NumDstSubresources == 2 && Subresources.m_EndPlane - Subresources.m_BeginPlane == 2)); CViewSubresourceSubset SubresourceIteration(Subresources, Dst.AppDesc()->MipLevels(), Dst.AppDesc()->ArraySize(), PlaneCount); assert(!SubresourceIteration.IsEmpty()); } #endif //---------------------------------------------------------------------------------------------------------------------------------- bool ImmediateContext::CPrepareUpdateSubresourcesHelper::InitializePlacementsAndCalculateSize(const D3D12_BOX* pDstBox, ImmediateContext& ImmCtx) { auto& LocalPlacementDescs = PreparedStorage.LocalPlacementDescs; // How big of an intermediate do we need? // If we need to use local placement structs, fill those out as well if (bUseLocalPlacement) { for (UINT i = 0; i < NumDstSubresources; ++i) { auto& PlacementDesc = LocalPlacementDescs[i]; UINT Subresource = ComposeSubresourceIdxExtended(Subresources.m_BeginMip, Subresources.m_BeginArray, Subresources.m_BeginPlane + i, Dst.AppDesc()->MipLevels(), Dst.AppDesc()->ArraySize()); UINT SlicePitch; if (pDstBox) { // No-op if (pDstBox->right <= pDstBox->left || pDstBox->bottom <= pDstBox->top || pDstBox->back <= pDstBox->front) { return true; } // Note: D3D11 provides a subsampled box, so for planar formats, we need to use the plane format to avoid subsampling again Resource::FillSubresourceDesc(ImmCtx.m_pDevice12.get(), ImmCtx.GetOptions13().UnrestrictedBufferTextureCopyPitchSupported, Dst.GetSubresourcePlacement(Subresource).Footprint.Format, pDstBox->right - pDstBox->left, pDstBox->bottom - pDstBox->top, pDstBox->back - pDstBox->front, PlacementDesc); CD3D11FormatHelper::CalculateMinimumRowMajorSlicePitch(PlacementDesc.Footprint.Format, PlacementDesc.Footprint.RowPitch, PlacementDesc.Footprint.Height, SlicePitch); } else { PlacementDesc = Dst.GetSubresourcePlacement(Subresource); SlicePitch = Dst.DepthPitch(Subresource); } PlacementDesc.Offset = TotalSize; TotalSize += Align(static_cast(SlicePitch) * PlacementDesc.Footprint.Depth, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); } } else { auto& Placement = Dst.GetSubresourcePlacement(LastDstSubresource); // If the destination is suballocated, make sure to factor out the suballocated offset when calculating how // large the resource needs to be UINT64 suballocationOffset = Dst.m_Identity->m_bOwnsUnderlyingResource ? 0 : Dst.m_Identity->GetSuballocatedOffset(); TotalSize = (Placement.Offset - suballocationOffset) + static_cast(Dst.DepthPitch(LastDstSubresource)) * Placement.Footprint.Depth - (Dst.GetSubresourcePlacement(FirstDstSubresource).Offset - suballocationOffset); } return false; } //---------------------------------------------------------------------------------------------------------------------------------- // Only respect predication in response to an actual UpdateSubresource (or similar) API call. // Internal uses of UpdateSubresource, as well as initial data, should ignore predication. // Batched update operations cannot even query predication and must assume a copy must be used. bool ImmediateContext::CPrepareUpdateSubresourcesHelper::NeedToRespectPredication(UpdateSubresourcesFlags flags) const { return (flags & UpdateSubresourcesFlags::ScenarioMask) == UpdateSubresourcesFlags::ScenarioImmediateContext; } //---------------------------------------------------------------------------------------------------------------------------------- bool ImmediateContext::CPrepareUpdateSubresourcesHelper::NeedTemporaryUploadHeap(UpdateSubresourcesFlags flags , ImmediateContext& ImmCtx) const { UpdateSubresourcesFlags scenario = (flags & UpdateSubresourcesFlags::ScenarioMask); bool bCanWriteDirectlyToResource = scenario != UpdateSubresourcesFlags::ScenarioBatchedContext && // If we aren't explicitly requesting a copy to a temp... (!NeedToRespectPredication(flags) || !ImmCtx.m_CurrentState.m_pPredicate) && // And we don't need to respect predication... !Dst.GetIdentity()->m_bOwnsUnderlyingResource && // And the resource came from a pool... Dst.GetAllocatorHeapType() != AllocatorHeapType::Readback; // And it's not the readback pool... if (bCanWriteDirectlyToResource && scenario != UpdateSubresourcesFlags::ScenarioInitialData) { // Check if resource is idle. CViewSubresourceSubset SubresourceIteration(Subresources, Dst.AppDesc()->MipLevels(), Dst.AppDesc()->ArraySize(), PlaneCount); for (auto&& range : SubresourceIteration) { for (UINT i = range.first; i < range.second; ++i) { if (!ImmCtx.SynchronizeForMap(&Dst, i, MAP_TYPE_WRITE, true)) { bCanWriteDirectlyToResource = false; break; } } if (!bCanWriteDirectlyToResource) { break; } } } // ... And it's not busy, then we can do this upload operation directly into the final destination resource. return !bCanWriteDirectlyToResource; } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::CPrepareUpdateSubresourcesHelper::InitializeMappableResource(UpdateSubresourcesFlags flags, ImmediateContext& ImmCtx, D3D12_BOX const* pDstBox) { UpdateSubresourcesFlags scenario = flags & UpdateSubresourcesFlags::ScenarioMask; CachedNeedsTemporaryUploadHeap = NeedTemporaryUploadHeap(flags, ImmCtx); if (CachedNeedsTemporaryUploadHeap) { ResourceAllocationContext threadingContext = ResourceAllocationContext::ImmediateContextThreadTemporary; if ((scenario == UpdateSubresourcesFlags::ScenarioInitialData && ImmCtx.m_CreationArgs.CreatesAndDestroysAreMultithreaded) || scenario == UpdateSubresourcesFlags::ScenarioBatchedContext) { threadingContext = ResourceAllocationContext::FreeThread; } mappableResource = ImmCtx.AcquireSuballocatedHeap(AllocatorHeapType::Upload, TotalSize, threadingContext); // throw( _com_error ) } else { if (pDstBox) { // Only DX9 managed vertex buffers and dx11 padded constant buffers hit this path, so extra complexity isn't required yet assert(Dst.Parent()->m_desc12.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER); assert(!bDeInterleavingUpload && Dst.GetSubresourcePlacement(0).Footprint.Format == DXGI_FORMAT_UNKNOWN); assert(pDstBox->top == 0 && pDstBox->front == 0); bufferOffset = pDstBox ? pDstBox->left : 0; } mappableResource = Dst.GetIdentity()->m_suballocation; } assert(mappableResource.IsInitialized()); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::CPrepareUpdateSubresourcesHelper::UploadSourceDataToMappableResource(void* pDstData, D3D11_SUBRESOURCE_DATA const* pSrcData, ImmediateContext& ImmCtx, UpdateSubresourcesFlags flags) { // The source data array provided is indexed by D3D11.0 subresource indices for (UINT SrcDataIdx = 0; SrcDataIdx < NumSrcData; ++SrcDataIdx) { auto& SrcData = pSrcData[SrcDataIdx]; UINT ArraySlice, MipLevel; DecomposeSubresourceIdxNonExtended(SrcDataIdx, Subresources.m_EndMip - Subresources.m_BeginMip, MipLevel, ArraySlice); const BYTE* pSrcPlaneData = reinterpret_cast(SrcData.pSysMem); // Even though the next subresource is supposed to be the next mip, planes are iterated last so that the pointer adjustment // for planar source data doesn't need to be calculated a second time for (UINT Plane = Subresources.m_BeginPlane; Plane < Subresources.m_EndPlane; ++Plane) { const UINT Subresource = ComposeSubresourceIdxExtended(MipLevel + Subresources.m_BeginMip, ArraySlice + Subresources.m_BeginArray, Plane, Dst.AppDesc()->MipLevels(), Dst.AppDesc()->ArraySize()); auto& Placement = bUseLocalPlacement ? PreparedStorage.LocalPlacementDescs[Plane - Subresources.m_BeginPlane] : Dst.GetSubresourcePlacement(Subresource); BYTE* pDstSubresourceData = reinterpret_cast(pDstData) + Placement.Offset - PreparedStorage.Base.OffsetAdjustment; // If writing directly into the resource, we need to account for the dstBox instead of leaving it to the GPU copy if (!CachedNeedsTemporaryUploadHeap) { pDstSubresourceData += bufferOffset; } if (bDeInterleavingUpload) { DepthStencilDeInterleavingUpload(ImmCtx.GetParentForFormat(Dst.AppDesc()->Format()), Plane, pSrcPlaneData, SrcData.SysMemPitch, pDstSubresourceData, Placement.Footprint.RowPitch, Placement.Footprint.Width, Placement.Footprint.Height); // Intentionally not advancing the src pointer, since the next copy reads from the same data } else if ((flags & UpdateSubresourcesFlags::ChannelSwapR10G10B10A2) != UpdateSubresourcesFlags::None) { Swap10bitRBUpload(pSrcPlaneData, SrcData.SysMemPitch, SrcData.SysMemSlicePitch, pDstSubresourceData, Placement.Footprint.RowPitch, Dst.DepthPitch(Subresource), Placement.Footprint.Width, Placement.Footprint.Height, Placement.Footprint.Depth); } else { // Tight row pitch is how much data to copy per row UINT TightRowPitch; CD3D11FormatHelper::CalculateMinimumRowMajorRowPitch(Placement.Footprint.Format, Placement.Footprint.Width, TightRowPitch); // Slice pitches are provided to enable fast paths which use a single memcpy UINT SrcSlicePitch = Dst.Parent()->ResourceDimension12() < D3D12_RESOURCE_DIMENSION_TEXTURE3D ? (SrcData.SysMemPitch * Placement.Footprint.Height) : SrcData.SysMemSlicePitch; UINT DstSlicePitch; if (bDstBoxPresent) { CD3D11FormatHelper::CalculateMinimumRowMajorSlicePitch(Placement.Footprint.Format, Placement.Footprint.RowPitch, Placement.Footprint.Height, DstSlicePitch); } else { DstSlicePitch = Dst.DepthPitch(Subresource); } ImmediateContext::UploadDataToMappedBuffer(pSrcPlaneData, SrcData.SysMemPitch, SrcSlicePitch, pDstSubresourceData, Placement.Footprint, DstSlicePitch, TightRowPitch); pSrcPlaneData += SrcData.SysMemPitch * Placement.Footprint.Height; } } } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::CPrepareUpdateSubresourcesHelper::UploadDataToMappableResource(D3D11_SUBRESOURCE_DATA const* pSrcData, ImmediateContext& ImmCtx, D3D12_BOX const* pDstBox, const void* pClearPattern, UINT ClearPatternSize, UpdateSubresourcesFlags flags) { // Now that we have something we can upload the data to, map it void* pDstData; const D3D12_RANGE ReadRange = {}; ThrowFailure(mappableResource.Map(0, &ReadRange, &pDstData)); // throw( _com_error ) // Now, upload the data for each subresource // The offset adjustment is subtracted from the offset of the given subresource, to calculate a location to write to // If we are doing UpdateSubresource on subresources 3 and 4, the offset to write to for subresource 4 is (offset of 4 - offset of 3) auto& FirstSubresourcePlacement = bUseLocalPlacement ? PreparedStorage.LocalPlacementDescs[0] : Dst.GetSubresourcePlacement(FirstDstSubresource); PreparedStorage.Base.OffsetAdjustment = FirstSubresourcePlacement.Offset; if (pSrcData != nullptr) { UploadSourceDataToMappableResource(pDstData, pSrcData, ImmCtx, flags); } else { // Just zero/fill the memory assert(TotalSize < size_t(-1)); UINT64 CopySize = TotalSize; // If writing directly into the resource, we need to account for the dstBox instead of leaving it to the GPU copy if (!CachedNeedsTemporaryUploadHeap && pDstBox) { CopySize = min(CopySize, pDstBox->right - pDstBox->left); } if (pClearPattern) { assert(!CD3D11FormatHelper::Planar(Dst.AppDesc()->Format()) && CD3D11FormatHelper::GetBitsPerElement(Dst.AppDesc()->Format()) % 8 == 0); assert(NumDstSubresources == 1); // What we're clearing here may not be one pixel, so intentionally using GetByteAlignment to determine the minimum size // for a fully aligned block of pixels. (E.g. YUY2 is 8 bits per element * 2 elements per pixel * 2 pixel subsampling = 32 bits of clear data). const UINT SizeOfClearPattern = ClearPatternSize != 0 ? ClearPatternSize : CD3D11FormatHelper::GetByteAlignment(Dst.AppDesc()->Format()); UINT ClearByteIndex = 0; auto generator = [&]() { auto result = *(reinterpret_cast(pClearPattern) + ClearByteIndex); ClearByteIndex = (ClearByteIndex + 1) % SizeOfClearPattern; return result; }; if (FirstSubresourcePlacement.Footprint.RowPitch % SizeOfClearPattern != 0) { UINT SlicePitch; CD3D11FormatHelper::CalculateMinimumRowMajorSlicePitch( FirstSubresourcePlacement.Footprint.Format, FirstSubresourcePlacement.Footprint.RowPitch, FirstSubresourcePlacement.Footprint.Height, SlicePitch); // We need to make sure to leave a gap in the pattern so that it starts on byte 0 for every row for (UINT z = 0; z < FirstSubresourcePlacement.Footprint.Depth; ++z) { for (UINT y = 0; y < FirstSubresourcePlacement.Footprint.Height; ++y) { BYTE* pDstRow = (BYTE*)pDstData + bufferOffset + FirstSubresourcePlacement.Footprint.RowPitch * y + SlicePitch * z; ClearByteIndex = 0; std::generate_n(pDstRow, FirstSubresourcePlacement.Footprint.RowPitch, generator); } } } else { std::generate_n((BYTE*)pDstData + bufferOffset, CopySize, generator); } } else { ZeroMemory((BYTE *)pDstData + bufferOffset, static_cast(CopySize)); } } CD3DX12_RANGE WrittenRange(0, SIZE_T(TotalSize)); mappableResource.Unmap(0, &WrittenRange); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::CPrepareUpdateSubresourcesHelper::WriteOutputParameters(D3D12_BOX const* pDstBox, UpdateSubresourcesFlags flags) { // Write output parameters UpdateSubresourcesFlags scenario = flags & UpdateSubresourcesFlags::ScenarioMask; if (pDstBox) { PreparedStorage.Base.DstX = pDstBox->left; PreparedStorage.Base.DstY = pDstBox->top; PreparedStorage.Base.DstZ = pDstBox->front; } else { PreparedStorage.Base.DstX = 0; PreparedStorage.Base.DstY = 0; PreparedStorage.Base.DstZ = 0; } PreparedStorage.Base.EncodedBlock = EncodedResourceSuballocation(mappableResource); PreparedStorage.Base.EncodedSubresourceSubset = Subresources; PreparedStorage.Base.bDisablePredication = (scenario == UpdateSubresourcesFlags::ScenarioInitialData || scenario == UpdateSubresourcesFlags::ScenarioImmediateContextInternalOp); PreparedStorage.Base.bDstBoxPresent = bDstBoxPresent; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void ImmediateContext::UpdateSubresources(Resource* pDst, D3D12TranslationLayer::CSubresourceSubset const& Subresources, const D3D11_SUBRESOURCE_DATA* pSrcData, const D3D12_BOX* pDstBox, UpdateSubresourcesFlags flags, const void* pClearColor ) { CPrepareUpdateSubresourcesHelper PrepareHelper(*pDst, Subresources, pSrcData, pDstBox, flags, pClearColor, 0, *this); if (PrepareHelper.FinalizeNeeded) { FinalizeUpdateSubresources(pDst, PrepareHelper.PreparedStorage.Base, PrepareHelper.bUseLocalPlacement ? PrepareHelper.PreparedStorage.LocalPlacementDescs : nullptr); } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ResourceUpdateSubresourceUP(Resource* pResource, UINT DstSubresource, _In_opt_ const D3D12_BOX* pDstBox, _In_ const VOID* pMem, UINT SrcPitch, UINT SrcDepth) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); D3D11_SUBRESOURCE_DATA SubresourceDesc = { pMem, SrcPitch, SrcDepth }; UINT8 MipLevel, PlaneSlice; UINT16 ArraySlice; DecomposeSubresourceIdxExtended(DstSubresource, pResource->AppDesc()->MipLevels(), pResource->AppDesc()->ArraySize(), MipLevel, ArraySlice, PlaneSlice); UpdateSubresources(pResource, CSubresourceSubset(1, 1, pResource->SubresourceMultiplier(), MipLevel, ArraySlice, PlaneSlice), &SubresourceDesc, pDstBox); } //---------------------------------------------------------------------------------------------------------------------------------- // Calculate either a new coordinate in the same subresource, or targeting a new subresource with the number of tiles remaining inline void CalcNewTileCoords(D3D12_TILED_RESOURCE_COORDINATE &Coord, UINT &NumTiles, D3D12_SUBRESOURCE_TILING const& SubresourceTiling) { CalcNewTileCoords(reinterpret_cast(Coord), NumTiles, reinterpret_cast(SubresourceTiling)); } COMMAND_LIST_TYPE ImmediateContext::GetFallbackCommandListType(UINT commandListTypeMask) { static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "ImmediateContext::GetFallbackCommandListType must support all command list types."); COMMAND_LIST_TYPE fallbackList[] = { COMMAND_LIST_TYPE::GRAPHICS, COMMAND_LIST_TYPE::VIDEO_DECODE, COMMAND_LIST_TYPE::VIDEO_PROCESS, }; for (auto type : fallbackList) { if (commandListTypeMask & (1 << (UINT)type)) { return type; } } return COMMAND_LIST_TYPE::GRAPHICS; } ImmediateContext::ArchitectureFlags ImmediateContext::QueryArchitectureFlags() { D3D12_FEATURE_DATA_ARCHITECTURE1 data; data.NodeIndex = m_nodeIndex; CheckFeatureSupport(D3D12_FEATURE_ARCHITECTURE1, &data, sizeof(data)); ArchitectureFlags flags; flags.isTileBasedRenderer = data.TileBasedRenderer; flags.isUMA = data.UMA; flags.iscacheCoherentUMA = data.CacheCoherentUMA; flags.isIsolatedMMU = data.IsolatedMMU; return flags; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::UpdateTileMappings( Resource* pResource, UINT NumTiledResourceRegions, _In_reads_(NumTiledResourceRegions) const D3D12_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoords, _In_reads_opt_(NumTiledResourceRegions) const D3D12_TILE_REGION_SIZE* pTiledResourceRegionSizes, Resource* pTilePool, UINT NumRanges, _In_reads_opt_(NumRanges) const TILE_RANGE_FLAG* pRangeFlags, _In_reads_opt_(NumRanges) const UINT* pTilePoolStartOffsets, _In_reads_opt_(NumRanges) const UINT* pRangeTileCounts, TILE_MAPPING_FLAG Flags) { bool NeedToSubmit = true; UINT commandListTypeMask = pResource->GetCommandListTypeMask(); if (commandListTypeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { commandListTypeMask = COMMAND_LIST_TYPE_GRAPHICS_MASK; // fallback to graphics NeedToSubmit = false; } // if we have subresources appearing in different command list types, we need to synchronize to a target command list and then submit the operation on the target one. COMMAND_LIST_TYPE targetListType = GetFallbackCommandListType(commandListTypeMask); UINT targetListMask = 1 << (UINT)targetListType; if (!IsSingleCommandListType(commandListTypeMask)) { for (UINT subresource = 0; subresource < pResource->NumSubresources(); subresource++) { if (pResource->GetCommandListTypeMask(subresource) != targetListMask) { m_ResourceStateManager.TransitionSubresource(pResource, subresource, D3D12_RESOURCE_STATE_COMMON, targetListType, SubresourceTransitionFlags::ForceExclusiveState); } } m_ResourceStateManager.ApplyAllResourceTransitions(); } UpdateTileMappingsImpl(targetListType, pResource, NumTiledResourceRegions, pTiledResourceRegionStartCoords, pTiledResourceRegionSizes, pTilePool, NumRanges, pRangeFlags, pTilePoolStartOffsets, pRangeTileCounts, Flags, NeedToSubmit); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::UpdateTileMappingsImpl( COMMAND_LIST_TYPE commandListType, Resource* pResource, UINT NumTiledResourceRegions, _In_reads_(NumTiledResourceRegions) const D3D12_TILED_RESOURCE_COORDINATE* pTiledResourceRegionStartCoords, _In_reads_opt_(NumTiledResourceRegions) const D3D12_TILE_REGION_SIZE* pTiledResourceRegionSizes, Resource* pTilePool, UINT NumRanges, _In_reads_opt_(NumRanges) const TILE_RANGE_FLAG* pRangeFlags, _In_reads_opt_(NumRanges) const UINT* pTilePoolStartOffsets, _In_reads_opt_(NumRanges) const UINT* pRangeTileCounts, TILE_MAPPING_FLAG Flags, bool NeedToSubmit) { // Helper methods auto pfnGetAllocationForTile = [pTilePool](UINT Tile) -> Resource::STilePoolAllocation& { const UINT BytesPerTile = D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; const UINT ByteOffset = Tile * BytesPerTile; UINT CurrBytes = 0; for (auto& Allocation : pTilePool->m_TilePool.m_Allocations) { CurrBytes += Allocation.m_Size; if (ByteOffset < CurrBytes) return Allocation; } ASSUME(false); }; // This code still honors the D3D11 tiled resource tier 1 restriction, but does so only on D3D12 resource heap tier 2 or above. // Yes, they are related; but they aren't yet fully teased apart. // The tiled resource tier 1 restriction: one physical page cannot be mapped to buffer & texture simulatenously. // D3D12 resource heap tier 1 precludes one physical page from being mapped to three types of resources simulatenously. assert(m_caps.ResourceHeapTier >= D3D12_RESOURCE_HEAP_TIER_2 ); const bool bTier1 = m_caps.TiledResourcesTier == D3D12_TILED_RESOURCES_TIER_1; const bool bTexture = bTier1 && pResource->Parent()->ResourceDimension12() != D3D12_RESOURCE_DIMENSION_BUFFER; auto pfnGetHeapForAllocation = [=](Resource::STilePoolAllocation& Allocation) -> ID3D12Heap* { auto& spHeap = bTexture ? Allocation.m_spUnderlyingTextureHeap : Allocation.m_spUnderlyingBufferHeap; if (!spHeap) { CD3DX12_HEAP_DESC Desc(Allocation.m_Size, GetHeapProperties(D3D12_HEAP_TYPE_DEFAULT)); HRESULT hr = m_pDevice12->CreateHeap( &Desc, IID_PPV_ARGS(&spHeap) ); ThrowFailure(hr); } return spHeap.get(); }; if (NeedToSubmit && (Flags & TILE_MAPPING_NO_OVERWRITE) == 0 && HasCommands(commandListType)) { SubmitCommandList(commandListType); // throws } pResource->UsedInCommandList(commandListType, GetCommandListID(commandListType)); GetCommandListManager(commandListType)->ExecuteCommandQueueCommand([&]() { UINT NumStandardMips = pResource->m_TiledResource.m_NumStandardMips; UINT NumTilesRequired = pResource->m_TiledResource.m_NumTilesForResource; bool bPackedMips = NumStandardMips != pResource->AppDesc()->MipLevels(); if (pTilePool) { if (pTilePool != pResource->m_TiledResource.m_pTilePool && pResource->m_TiledResource.m_pTilePool != nullptr) { // Unmap all tiles from the old tile pool static const D3D12_TILE_RANGE_FLAGS NullFlag = D3D12_TILE_RANGE_FLAG_NULL; static const D3D12_TILED_RESOURCE_COORDINATE StartCoords = {}; const D3D12_TILE_REGION_SIZE FullResourceSize = {NumTilesRequired}; GetCommandQueue(commandListType)->UpdateTileMappings( pResource->GetUnderlyingResource(), 1, // Number of regions &StartCoords, &FullResourceSize, nullptr, // Tile pool (can be null when unbinding) 1, // Number of ranges &NullFlag, nullptr, // Tile pool start (ignored when flag is null) &NumTilesRequired, D3D12_TILE_MAPPING_FLAGS( Flags )); } pResource->m_TiledResource.m_pTilePool = pTilePool; } // UpdateTileMappings is 1:1 with D3D12 if the tile pool has never been grown, // OR if the entire region of tiles comes from the first allocation (heap) // First: Does the entire range fit into one allocation (or does it not need any allocation references)? // Trivially true if we don't have a tile pool, or if the tile pool only has one allocation bool bNoAllocations = !pTilePool; bool bOneOrNoAllocations = bNoAllocations || pTilePool->m_TilePool.m_Allocations.size() == 1; // Trivial check says no - we can't pass null to 12, and we can't pass the 11 parameters straight through // Now we need to figure out if the tile ranges being specified are all really from the same allocation if (!bOneOrNoAllocations) { // Assume that we won't find a reference to another allocation bOneOrNoAllocations = true; bNoAllocations = true; assert(pTilePool); UINT Allocation0NumTiles = pTilePool->m_TilePool.m_Allocations.front().m_Size / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; for (UINT range = 0; range < NumRanges; ++range) { UINT RangeFlag = pRangeFlags ? pRangeFlags[range] : 0; if (RangeFlag == 0 || RangeFlag == TILE_RANGE_REUSE_SINGLE_TILE) { // We're definitely binding a tile bNoAllocations = false; UINT BaseTile = pTilePoolStartOffsets[range]; UINT EndTile = BaseTile + ((RangeFlag == 0 && pRangeTileCounts) ? pRangeTileCounts[range] : 1); if (EndTile > Allocation0NumTiles) { // And it's not the first one bOneOrNoAllocations = false; break; } } } } // Now we know how for sure how to translate this to 12 // If the first or no allocations, we can pass the 11 parameters straight through // (if no allocations, we can avoid lazy instantiation of per-kind heaps for tier 1) // Otherwise, we need to split this up into multiple API invocations if (bOneOrNoAllocations) { auto pCoord = reinterpret_cast(pTiledResourceRegionStartCoords); auto pSize = reinterpret_cast(pTiledResourceRegionSizes); ID3D12Heap *pHeap = nullptr; if (!bNoAllocations) { pHeap = pfnGetHeapForAllocation( pfnGetAllocationForTile(pTilePoolStartOffsets[0])); // throw( _com_error ) } GetCommandQueue(commandListType)->UpdateTileMappings( pResource->GetUnderlyingResource(), NumTiledResourceRegions, pCoord, pSize, pHeap, NumRanges, reinterpret_cast(pRangeFlags), pTilePoolStartOffsets, pRangeTileCounts, D3D12_TILE_MAPPING_FLAGS(Flags) ); } else { assert(pTilePool); // For each resource region or tile region, submit an UpdateTileMappings op D3D12_TILED_RESOURCE_COORDINATE Coord; D3D12_TILE_REGION_SIZE Size; D3D12_TILE_RANGE_FLAGS Flag = pRangeFlags ? static_cast(pRangeFlags[0]) : D3D12_TILE_RANGE_FLAG_NONE; UINT range = 0, region = 0; UINT CurrTile = pTilePoolStartOffsets[0]; UINT NumTiles = pRangeTileCounts ? pRangeTileCounts[0] : 0xffffffff; Coord = pTiledResourceRegionStartCoords ? reinterpret_cast(pTiledResourceRegionStartCoords[0]) : D3D12_TILED_RESOURCE_COORDINATE{}; Size = pTiledResourceRegionSizes ? reinterpret_cast(pTiledResourceRegionSizes[0]) : (pTiledResourceRegionStartCoords ? D3D12_TILE_REGION_SIZE{1, FALSE} : D3D12_TILE_REGION_SIZE{NumTilesRequired, FALSE}); D3D12_BOX CurrentBox = {}; bool bBox = false; while(range < NumRanges && region < NumTiledResourceRegions) { // Step 1: Figure out what will determine the bounds of this particular update: the region, the range, or the heap UINT NumTilesForRegion = Size.NumTiles; UINT NumTilesForRange = NumTiles; UINT NumTilesToUpdate = min(NumTilesForRegion, NumTilesForRange); auto &Allocation = pfnGetAllocationForTile(CurrTile); // If we are dealing with multiple tiles from the pool, does the current heap have enough space for it? if (Flag == D3D12_TILE_RANGE_FLAG_NONE) { UINT NumTilesInHeap = Allocation.m_Size / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES + Allocation.m_TileOffset - CurrTile; NumTilesToUpdate = min(NumTilesToUpdate, NumTilesInHeap); } // If the app wanted to use a box, but the region was not the smallest unit here, we need to break up the region // The simplest way to do that is to break it up into 1x1 regions, so we set that up here if (NumTilesToUpdate != NumTilesForRegion && Size.UseBox) { CurrentBox = {Coord.X, Coord.Y, Coord.Z, Coord.X + Size.Width, Coord.Y + Size.Height, Coord.Z + Size.Depth}; bBox = true; Size = {1, false}; NumTilesForRegion = 1; NumTilesToUpdate = 1; } // Step 2: Actually issue the update operation (if this range isn't being skipped) if (Flag != D3D12_TILE_RANGE_FLAG_SKIP) { D3D12_TILE_REGION_SIZE APISize = {NumTilesToUpdate, FALSE}; ID3D12Heap *pHeap = Flag == D3D12_TILE_RANGE_FLAG_NULL ? nullptr : pfnGetHeapForAllocation(Allocation); // throw( _com_error ) UINT BaseTile = CurrTile - Allocation.m_TileOffset; GetCommandQueue(commandListType)->UpdateTileMappings( pResource->GetUnderlyingResource(), 1, &Coord, &APISize, pHeap, 1, &Flag, &BaseTile, &NumTilesToUpdate, D3D12_TILE_MAPPING_FLAGS(Flags) ); } // Step 3: Advance the iteration structs // Start with the tiled resource region if (NumTilesToUpdate == NumTilesForRegion) { // First, flow through the box bool bAdvanceRegion = !bBox; if (bBox) { ++Coord.X; if (Coord.X == CurrentBox.right) { Coord.X = CurrentBox.left; ++Coord.Y; if (Coord.Y == CurrentBox.bottom) { Coord.Y = CurrentBox.top; ++Coord.Z; if (Coord.Z == CurrentBox.back) { bBox = false; bAdvanceRegion = true; } } } } // If we don't have a box, or we finished the box, then go to the next region if (bAdvanceRegion && ++region < NumTiledResourceRegions) { assert(pTiledResourceRegionStartCoords); Coord = reinterpret_cast(pTiledResourceRegionStartCoords[region]); Size = pTiledResourceRegionSizes ? reinterpret_cast(pTiledResourceRegionSizes[region]) : D3D12_TILE_REGION_SIZE{1, FALSE}; } } else { assert(!bBox); Size.NumTiles -= NumTilesToUpdate; // Calculate a new region based on tile flow across dimensions/mips UINT TempTileCount = NumTilesToUpdate; while (TempTileCount) { if (bPackedMips && Coord.Subresource >= NumStandardMips) { Coord.Subresource = NumStandardMips; Coord.X += TempTileCount; break; } else { D3D12_SUBRESOURCE_TILING const& SubresourceTiling = pResource->m_TiledResource.m_SubresourceTiling[Coord.Subresource % pResource->AppDesc()->MipLevels()]; CalcNewTileCoords(Coord, TempTileCount, SubresourceTiling); } } } // Then the tile pool range if (NumTilesToUpdate == NumTilesForRange) { if (++range < NumRanges) { assert(pRangeTileCounts); Flag = pRangeFlags ? static_cast(pRangeFlags[range]) : D3D12_TILE_RANGE_FLAG_NONE; CurrTile = Flag == D3D12_TILE_RANGE_FLAG_NULL ? 0 : pTilePoolStartOffsets[range]; NumTiles = pRangeTileCounts[range]; } } else { if (Flag == D3D12_TILE_RANGE_FLAG_NONE) { CurrTile += NumTilesToUpdate; } NumTiles -= NumTilesToUpdate; } } } }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::CopyTileMappings(Resource* pDstTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pDstStartCoords, Resource* pSrcTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pSrcStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, TILE_MAPPING_FLAG Flags) { UINT commandListTypeMask = pSrcTiledResource->GetCommandListTypeMask() | pDstTiledResource->GetCommandListTypeMask(); if (commandListTypeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { commandListTypeMask = COMMAND_LIST_TYPE_GRAPHICS_MASK; // fallback to graphics } // if we have subresources appearing in different command list types, we need to synchronize to a target command list and then submit the operation on the target one. COMMAND_LIST_TYPE targetListType = GetFallbackCommandListType(commandListTypeMask); UINT targetListMask = 1 << (UINT)targetListType; if (!IsSingleCommandListType(commandListTypeMask)) { for (UINT subresource = 0; subresource < pSrcTiledResource->NumSubresources(); subresource++) { if (pSrcTiledResource->GetCommandListTypeMask(subresource) != targetListMask) { m_ResourceStateManager.TransitionSubresource(pSrcTiledResource, subresource, D3D12_RESOURCE_STATE_COMMON, targetListType, SubresourceTransitionFlags::ForceExclusiveState); } } for (UINT subresource = 0; subresource < pDstTiledResource->NumSubresources(); subresource++) { if (pDstTiledResource->GetCommandListTypeMask(subresource) != targetListMask) { m_ResourceStateManager.TransitionSubresource(pDstTiledResource, subresource, D3D12_RESOURCE_STATE_COMMON, targetListType, SubresourceTransitionFlags::ForceExclusiveState); } } m_ResourceStateManager.ApplyAllResourceTransitions(); } CopyTileMappingsImpl(targetListType, pDstTiledResource, pDstStartCoords, pSrcTiledResource, pSrcStartCoords, pTileRegion, Flags); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::CopyTileMappingsImpl(COMMAND_LIST_TYPE commandListType, Resource* pDstTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pDstStartCoords, Resource* pSrcTiledResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pSrcStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, TILE_MAPPING_FLAG Flags ) { auto pDst = pDstTiledResource->GetUnderlyingResource(); auto pSrc = pSrcTiledResource->GetUnderlyingResource(); if ((Flags & TILE_MAPPING_NO_OVERWRITE) == 0 && HasCommands(commandListType)) { SubmitCommandList(commandListType); // throws } pDstTiledResource->UsedInCommandList(commandListType, GetCommandListID(commandListType)); pSrcTiledResource->UsedInCommandList(commandListType, GetCommandListID(commandListType)); GetCommandListManager(commandListType)->ExecuteCommandQueueCommand([&]() { auto pTilePool = pSrcTiledResource->m_TiledResource.m_pTilePool; if (pTilePool != pDstTiledResource->m_TiledResource.m_pTilePool && pDstTiledResource->m_TiledResource.m_pTilePool != nullptr) { UINT NumTilesRequired = pDstTiledResource->m_TiledResource.m_NumTilesForResource; // Unmap all tiles from the old tile pool static const D3D12_TILE_RANGE_FLAGS NullFlag = D3D12_TILE_RANGE_FLAG_NULL; static const D3D12_TILED_RESOURCE_COORDINATE StartCoords = {}; const D3D12_TILE_REGION_SIZE FullResourceSize = { NumTilesRequired }; GetCommandQueue(commandListType)->UpdateTileMappings( pDstTiledResource->GetUnderlyingResource(), 1, // Number of regions &StartCoords, &FullResourceSize, nullptr, // Tile pool (can be null when unbinding) 1, // Number of ranges &NullFlag, nullptr, // Tile pool start (ignored when flag is null) &NumTilesRequired, D3D12_TILE_MAPPING_FLAGS(Flags)); } pDstTiledResource->m_TiledResource.m_pTilePool = pTilePool; GetCommandQueue(commandListType)->CopyTileMappings(pDst, reinterpret_cast(pDstStartCoords), pSrc, reinterpret_cast(pSrcStartCoords), reinterpret_cast(pTileRegion), D3D12_TILE_MAPPING_FLAGS(Flags)); }); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::CopyTiles(Resource* pResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pStartCoords, _In_ const D3D12_TILE_REGION_SIZE* pTileRegion, Resource* pBuffer, UINT64 BufferOffset, TILE_COPY_FLAG Flags) { PreRender(COMMAND_LIST_TYPE::GRAPHICS); Resource *pSrc, *pDst; D3D12_RESOURCE_STATES StateForTiledResource; if (Flags & TILE_COPY_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE) { StateForTiledResource = D3D12_RESOURCE_STATE_COPY_DEST; m_ResourceStateManager.TransitionResource(pBuffer, D3D12_RESOURCE_STATE_COPY_SOURCE); pSrc = pBuffer; pDst = pResource; } else { assert(Flags & TILE_COPY_SWIZZLED_TILED_RESOURCE_TO_LINEAR_BUFFER); StateForTiledResource = D3D12_RESOURCE_STATE_COPY_SOURCE; m_ResourceStateManager.TransitionResource(pBuffer, D3D12_RESOURCE_STATE_COPY_DEST); pSrc = pResource; pDst = pBuffer; } { CTileSubresourceSubset TileSubset( *reinterpret_cast(pStartCoords), *reinterpret_cast(pTileRegion), pResource->Parent()->ResourceDimension11(), reinterpret_cast(pResource->m_TiledResource.m_SubresourceTiling.begin()), pResource->AppDesc()->MipLevels(), pResource->m_TiledResource.m_NumStandardMips); for (UINT Subresource : TileSubset) { m_ResourceStateManager.TransitionSubresource(pResource, Subresource, StateForTiledResource); } } m_ResourceStateManager.ApplyAllResourceTransitions(); auto pAPIResource = pResource->GetUnderlyingResource(); auto pAPIBuffer = pBuffer->GetUnderlyingResource(); GetGraphicsCommandList()->CopyTiles(pAPIResource, reinterpret_cast(pStartCoords), reinterpret_cast(pTileRegion), pAPIBuffer, BufferOffset, D3D12_TILE_COPY_FLAGS(Flags)); PostCopy(pSrc, 0, pDst, 0, pSrc->NumSubresources()); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::UpdateTiles(Resource* pResource, _In_ const D3D12_TILED_RESOURCE_COORDINATE* pCoord, _In_ const D3D12_TILE_REGION_SIZE* pRegion, const _In_ VOID* pData, UINT Flags) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"UpdateTiles"); #endif PreRender(COMMAND_LIST_TYPE::GRAPHICS); pResource->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); { CTileSubresourceSubset TileSubset( *reinterpret_cast(pCoord), *reinterpret_cast(pRegion), pResource->Parent()->ResourceDimension11(), reinterpret_cast(pResource->m_TiledResource.m_SubresourceTiling.begin()), pResource->AppDesc()->MipLevels(), pResource->m_TiledResource.m_NumStandardMips); for (UINT Subresource : TileSubset) { m_ResourceStateManager.TransitionSubresource(pResource, Subresource, D3D12_RESOURCE_STATE_COPY_DEST); } } m_ResourceStateManager.ApplyAllResourceTransitions(); UINT64 DataSize = (UINT64)pRegion->NumTiles * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; auto UploadHeap = AcquireSuballocatedHeap(AllocatorHeapType::Upload, DataSize, ResourceAllocationContext::ImmediateContextThreadTemporary); // throw( _com_error ) void* pMapped; CD3DX12_RANGE ReadRange(0, 0); HRESULT hr = UploadHeap.Map(0, &ReadRange, &pMapped); ThrowFailure(hr); // throw( _com_error ) assert(DataSize < (SIZE_T)-1); // Can't map a buffer whose size is more than size_t memcpy(pMapped, pData, SIZE_T(DataSize)); CD3DX12_RANGE WrittenRange(0, SIZE_T(DataSize)); UploadHeap.Unmap(0, &WrittenRange); GetGraphicsCommandList()->CopyTiles( pResource->GetUnderlyingResource(), reinterpret_cast(pCoord), reinterpret_cast(pRegion), UploadHeap.GetResource(), UploadHeap.GetOffset(), D3D12_TILE_COPY_FLAGS(Flags) | D3D12_TILE_COPY_FLAG_LINEAR_BUFFER_TO_SWIZZLED_TILED_RESOURCE ); ReleaseSuballocatedHeap(AllocatorHeapType::Upload, UploadHeap, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS), COMMAND_LIST_TYPE::GRAPHICS); PostUpload(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::TiledResourceBarrier(Resource* pBefore, Resource* pAfter) { UINT commandListTypeMask = COMMAND_LIST_TYPE_UNKNOWN_MASK; if (pAfter) { commandListTypeMask = pAfter->GetCommandListTypeMask(); } if (pBefore) { commandListTypeMask |= pBefore->GetCommandListTypeMask(); } // defaulting to graphics explicitly if (commandListTypeMask == COMMAND_LIST_TYPE_UNKNOWN_MASK) { commandListTypeMask = COMMAND_LIST_TYPE_GRAPHICS_MASK; } for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { if (commandListTypeMask & (1 << i)) { TiledResourceBarrierImpl((COMMAND_LIST_TYPE)i, pBefore, pAfter); } } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::TiledResourceBarrierImpl(COMMAND_LIST_TYPE commandListType, Resource* pBefore, Resource* pAfter) { PreRender(commandListType); D3D12_RESOURCE_BARRIER barrierDesc = {}; barrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_ALIASING; barrierDesc.Aliasing.pResourceBefore = (pBefore) ? pBefore->GetUnderlyingResource() : nullptr; barrierDesc.Aliasing.pResourceAfter = (pAfter) ? pAfter->GetUnderlyingResource() : nullptr; static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "ImmediateContext::TiledResourceBarrier must support all command list types."); switch (commandListType) { case COMMAND_LIST_TYPE::GRAPHICS: GetGraphicsCommandList()->ResourceBarrier(1, &barrierDesc); break; case COMMAND_LIST_TYPE::VIDEO_DECODE: GetVideoDecodeCommandList()->ResourceBarrier(1, &barrierDesc); break; case COMMAND_LIST_TYPE::VIDEO_PROCESS: GetVideoProcessCommandList()->ResourceBarrier(1, &barrierDesc); break; default: assert(0); } if (pBefore) { pBefore->UsedInCommandList(commandListType, GetCommandListID(commandListType)); } if (pAfter) { pAfter->UsedInCommandList(commandListType, GetCommandListID(commandListType)); } PostRender(commandListType); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ResizeTilePool(Resource* pResource, UINT64 NewSize ) { // For simplicity, tile pools in 11on12 are grow-only, since decrementing refs during tile mapping operations would be prohibitively expensive UINT64 CurrentSize = 0; for (auto& Allocation : pResource->m_TilePool.m_Allocations) { CurrentSize += Allocation.m_Size; if (CurrentSize >= NewSize) return; // Done } static const UINT64 Alignment = 1024*1024*4; static_assert(!(Alignment & (Alignment - 1)), "Alignment must be a power of 2"); UINT64 SizeDiff = NewSize - CurrentSize; SizeDiff = Align(SizeDiff, Alignment); // Each additional tile pool will be a multiple of 4MB assert(SizeDiff < (UINT)-1); pResource->m_TilePool.m_Allocations.emplace_back(UINT(SizeDiff), UINT(CurrentSize / D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES)); // throw( bad_alloc ) auto TiledResourcesTier = m_caps.TiledResourcesTier; if (TiledResourcesTier != D3D12_TILED_RESOURCES_TIER_1) { auto& Allocation = pResource->m_TilePool.m_Allocations.back(); CD3DX12_HEAP_DESC HeapDesc(SizeDiff, GetHeapProperties(D3D12_HEAP_TYPE_DEFAULT)); HRESULT hr = m_pDevice12->CreateHeap( &HeapDesc, IID_PPV_ARGS(&Allocation.m_spUnderlyingBufferHeap) ); ThrowFailure(hr); } } unique_comptr ImmediateContext::AcquireTransitionableUploadBuffer(AllocatorHeapType HeapType, UINT64 Size) noexcept(false) { TDynamicBufferPool& Pool = GetBufferPool(HeapType); auto pfnCreateNew = [this, HeapType](UINT64 Size) -> unique_comptr // noexcept(false) { return std::move(AllocateHeap(Size, 0, HeapType)); }; UINT64 CurrentFence = GetCompletedFenceValue(CommandListType(HeapType)); return Pool.RetrieveFromPool(Size, CurrentFence, pfnCreateNew); // throw( _com_error ) } D3D12ResourceSuballocation ImmediateContext::AcquireSuballocatedHeapForResource(_In_ Resource* pResource, ResourceAllocationContext threadingContext) noexcept(false) { UINT64 ResourceSize = pResource->GetResourceSize(); // SRV buffers do not allow offsets to be specified in bytes but instead by number of Elements. This requires that the offset must // always be aligned to an element size, which cannot be predicted since buffers can be created as DXGI_FORMAT_UNKNOWN and SRVs // can later be created later with an arbitrary DXGI_FORMAT. To handle this, we don't allow a suballocated offset for this case bool bCannotBeOffset = (pResource->AppDesc()->BindFlags() & RESOURCE_BIND_SHADER_RESOURCE) && (pResource->AppDesc()->ResourceDimension() == D3D12_RESOURCE_DIMENSION_BUFFER); AllocatorHeapType HeapType = pResource->GetAllocatorHeapType(); return AcquireSuballocatedHeap(HeapType, ResourceSize, threadingContext, bCannotBeOffset); // throw( _com_error ) } //---------------------------------------------------------------------------------------------------------------------------------- D3D12ResourceSuballocation ImmediateContext::AcquireSuballocatedHeap(AllocatorHeapType HeapType, UINT64 Size, ResourceAllocationContext threadingContext, bool bCannotBeOffset) noexcept(false) { if (threadingContext == ResourceAllocationContext::ImmediateContextThreadTemporary) { UploadHeapSpaceAllocated(CommandListType(HeapType), Size); } auto &allocator = GetAllocator(HeapType); HeapSuballocationBlock suballocation = TryAllocateResourceWithFallback([&]() { auto block = allocator.Allocate(Size, bCannotBeOffset); if (block.GetSize() == 0) { throw _com_error(E_OUTOFMEMORY); } return block; }, threadingContext); return D3D12ResourceSuballocation(allocator.GetInnerAllocation(suballocation), suballocation); } inline bool IsSyncPointLessThanOrEqual(UINT64(&lhs)[(UINT)COMMAND_LIST_TYPE::MAX_VALID], UINT64(&rhs)[(UINT)COMMAND_LIST_TYPE::MAX_VALID]) { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (lhs[i] > rhs[i]) return false; } return true; } //---------------------------------------------------------------------------------------------------------------------------------- bool ImmediateContext::ResourceAllocationFallback(ResourceAllocationContext threadingContext) { if (TrimDeletedObjects()) { return true; } UINT64 SyncPoints[2][(UINT)COMMAND_LIST_TYPE::MAX_VALID]; bool SyncPointExists[2]; { auto DeletionManagerLocked = m_DeferredDeletionQueueManager.GetLocked(); SyncPointExists[0] = DeletionManagerLocked->GetFenceValuesForObjectDeletion(SyncPoints[0]); SyncPointExists[1] = DeletionManagerLocked->GetFenceValuesForSuballocationDeletion(SyncPoints[1]); } // If one is strictly less than the other, wait just for that one. if (SyncPointExists[0] && SyncPointExists[1]) { if (IsSyncPointLessThanOrEqual(SyncPoints[0], SyncPoints[1])) { SyncPointExists[1] = false; } else if (IsSyncPointLessThanOrEqual(SyncPoints[1], SyncPoints[0])) { SyncPointExists[0] = false; } } const bool ImmediateContextThread = threadingContext != ResourceAllocationContext::FreeThread; auto WaitForSyncPoint = [&](UINT64(&SyncPoint)[(UINT)COMMAND_LIST_TYPE::MAX_VALID]) { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { auto CommandListManager = GetCommandListManager((COMMAND_LIST_TYPE)i); if (CommandListManager) { CommandListManager->WaitForFenceValueInternal(ImmediateContextThread, SyncPoint[i]); // throws } } }; //If index == 0 we are checking for object deletion, else subobject deletion auto WasMemoryFreed = [&](int index) -> bool { // DeferredDeletionQueueManager::TrimDeletedObjects() is the only place where we pop() // items from the deletion queues. This means that, if the sync points are different after // the WaitForSyncPoint call, we must have called TrimDeletedObjects and freed some memory. UINT64 newSyncPoint[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; auto DeletionManagerLocked = m_DeferredDeletionQueueManager.GetLocked(); bool newSyncPointExists = false; if (index == 0) { newSyncPointExists = DeletionManagerLocked->GetFenceValuesForObjectDeletion(newSyncPoint); } else { newSyncPointExists = DeletionManagerLocked->GetFenceValuesForSuballocationDeletion(newSyncPoint); } constexpr size_t numBytes = sizeof(UINT64) * (size_t)COMMAND_LIST_TYPE::MAX_VALID; return !newSyncPointExists || memcmp(&SyncPoints[index], &newSyncPoint, numBytes); }; bool freedMemory = false; if (SyncPointExists[0]) { WaitForSyncPoint(SyncPoints[0]); // throws freedMemory = WasMemoryFreed(0); } if (SyncPointExists[1]) { WaitForSyncPoint(SyncPoints[1]); // throws freedMemory |= WasMemoryFreed(1); } // If we've already freed up memory go ahead and return true, else try to Trim now and return that result return freedMemory || TrimDeletedObjects(); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ReturnAllBuffersToPool(Resource& UnderlyingResource) noexcept { if (!UnderlyingResource.m_Identity) { return; } if (!UnderlyingResource.m_Identity->m_bOwnsUnderlyingResource) { assert(UnderlyingResource.m_Identity->m_spUnderlyingResource.get() == nullptr); AllocatorHeapType HeapType = UnderlyingResource.GetAllocatorHeapType(); if (!UnderlyingResource.m_Identity->m_suballocation.IsInitialized()) { return; } assert(UnderlyingResource.AppDesc()->CPUAccessFlags() != 0); ReleaseSuballocatedHeap( HeapType, UnderlyingResource.m_Identity->m_suballocation, UnderlyingResource.m_LastUsedCommandListID); } } // This is for cases where we're copying a small subrect from one surface to another, // specifically when we only want to copy the first part of each row. void MemcpySubresourceWithCopySize( _In_ const D3D12_MEMCPY_DEST* pDest, _In_ const D3D12_SUBRESOURCE_DATA* pSrc, SIZE_T /*RowSizeInBytes*/, UINT CopySize, UINT NumRows, UINT NumSlices) { for (UINT z = 0; z < NumSlices; ++z) { BYTE* pDestSlice = reinterpret_cast(pDest->pData) + pDest->SlicePitch * z; const BYTE* pSrcSlice = reinterpret_cast(pSrc->pData) + pSrc->SlicePitch * z; for (UINT y = 0; y < NumRows; ++y) { memcpy(pDestSlice + pDest->RowPitch * y, pSrcSlice + pSrc->RowPitch * y, CopySize); } } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ReturnTransitionableBufferToPool(AllocatorHeapType HeapType, UINT64 Size, unique_comptr&& spResource, UINT64 FenceValue) noexcept { TDynamicBufferPool& Pool = GetBufferPool(HeapType); Pool.ReturnToPool( Size, std::move(spResource), FenceValue); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ReleaseSuballocatedHeap(AllocatorHeapType HeapType, D3D12ResourceSuballocation &resource, UINT64 FenceValue, COMMAND_LIST_TYPE commandListType) noexcept { auto &allocator = GetAllocator(HeapType); m_DeferredDeletionQueueManager.GetLocked()->AddSuballocationToQueue(resource.GetBufferSuballocation(), allocator, commandListType, FenceValue); resource.Reset(); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ReleaseSuballocatedHeap(AllocatorHeapType HeapType, D3D12ResourceSuballocation &resource, const UINT64 FenceValues[]) noexcept { auto &allocator = GetAllocator(HeapType); m_DeferredDeletionQueueManager.GetLocked()->AddSuballocationToQueue(resource.GetBufferSuballocation(), allocator, FenceValues); resource.Reset(); } //---------------------------------------------------------------------------------------------------------------------------------- Resource* TRANSLATION_API ImmediateContext::CreateRenameCookie(Resource* pResource, ResourceAllocationContext threadingContext) { assert(pResource->GetEffectiveUsage() == RESOURCE_USAGE_DYNAMIC); auto creationArgsCopy = pResource->m_creationArgs; creationArgsCopy.m_appDesc.m_usage = RESOURCE_USAGE_STAGING; // Make sure it's suballocated creationArgsCopy.m_heapDesc.Properties = CD3DX12_HEAP_PROPERTIES(Resource::GetD3D12HeapType(RESOURCE_USAGE_STAGING, creationArgsCopy.m_appDesc.CPUAccessFlags()), GetNodeMask(), GetNodeMask()); // Strip video flags which don't make sense on the staging buffer in D3D12 and trip up allocation logic that follows. creationArgsCopy.m_appDesc.m_bindFlags &= ~(RESOURCE_BIND_DECODER | RESOURCE_BIND_VIDEO_ENCODER | RESOURCE_BIND_CAPTURE); creationArgsCopy.m_flags11.BindFlags &= ~(D3D11_BIND_DECODER | D3D11_BIND_VIDEO_ENCODER); // Inherit the heap type from from the previous resource (which may account for the video flags stripped above). creationArgsCopy.m_heapType = pResource->GetAllocatorHeapType(); // TODO: See if there's a good way to cache these guys. unique_comptr renameResource = Resource::CreateResource(this, creationArgsCopy, threadingContext); renameResource->ZeroConstantBufferPadding(); assert(renameResource->GetAllocatorHeapType() == AllocatorHeapType::Upload || renameResource->GetAllocatorHeapType() == AllocatorHeapType::Decoder); assert(renameResource->GetAllocatorHeapType() == pResource->GetAllocatorHeapType()); m_RenamesInFlight.GetLocked()->emplace_back(renameResource.get()); return renameResource.get(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::Rename(Resource* pResource, Resource* pRenameResource) { unique_comptr renameResource(pRenameResource); Resource* rotate[2] = { pResource, renameResource.get() }; RotateResourceIdentities(rotate, 2); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::RenameViaCopy(Resource* pResource, Resource* pRenameResource, UINT DirtyPlaneMask) { #ifdef USE_PIX PIXSetMarker(GetGraphicsCommandList(), 0ull, L"Rename resource via copy"); #endif unique_comptr renameResource(pRenameResource); assert(pResource->AppDesc()->MipLevels() == 1 && pResource->AppDesc()->ArraySize() == 1); CDisablePredication DisablePredication(this); const UINT8 PlaneCount = (pResource->SubresourceMultiplier() * pResource->AppDesc()->NonOpaquePlaneCount()); const bool EntireResourceDirty = (DirtyPlaneMask == (1u << PlaneCount) - 1u); if (EntireResourceDirty) { ResourceCopy(pResource, renameResource.get()); } else { for (UINT subresource = 0; subresource < PlaneCount; ++subresource) { if (DirtyPlaneMask & (1 << subresource)) { ResourceCopyRegion(pResource, subresource, 0, 0, 0, renameResource.get(), subresource, nullptr); } } } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::DeleteRenameCookie(Resource* pRenameResource) { auto LockedContainer = m_RenamesInFlight.GetLocked(); auto iter = std::find_if(LockedContainer->begin(), LockedContainer->end(), [pRenameResource](unique_comptr const& r) { return r.get() == pRenameResource; }); assert(iter != LockedContainer->end()); // The only scenario where video is relevant here is for decode bitstream buffers. All other instances of // resource renaming are Map(DISCARD) graphics operations. COMMAND_LIST_TYPE CmdListType = pRenameResource->GetAllocatorHeapType() == AllocatorHeapType::Decoder ? COMMAND_LIST_TYPE::VIDEO_DECODE : COMMAND_LIST_TYPE::GRAPHICS; pRenameResource->UsedInCommandList(CmdListType, GetCommandListID(CmdListType)); LockedContainer->erase(iter); } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::MapDiscardBuffer(Resource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMap ) { #ifdef USE_PIX PIXSetMarker(0ull, L"Map(DISCARD) buffer"); #endif assert(pResource->NumSubresources() == 1 && pResource->GetEffectiveUsage() == RESOURCE_USAGE_DYNAMIC); assert(pResource->UnderlyingResourceIsSuballocated()); bool bNeedRename = false; { auto& currentState = pResource->m_Identity->m_currentState; if (currentState.IsExclusiveState(Subresource)) { bNeedRename = currentState.GetExclusiveSubresourceState(Subresource).FenceValue > 0; } else { auto& sharedState = currentState.GetSharedSubresourceState(Subresource); bNeedRename = std::any_of(std::begin(sharedState.FenceValues), std::end(sharedState.FenceValues), [](UINT64 Value) { return Value > 0; }); } } if (bNeedRename) { if (!pResource->WaitForOutstandingResourcesIfNeeded(DoNotWait)) { return false; } auto cookie = CreateRenameCookie(pResource, ResourceAllocationContext::ImmediateContextThreadLongLived); Rename(pResource, cookie); DeleteRenameCookie(cookie); } return MapUnderlying(pResource, Subresource, MapType, pReadWriteRange, pMap); } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::MapDynamicTexture(Resource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMap ) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"Map of non-mappable resource"); #endif assert(pResource->GetEffectiveUsage() == RESOURCE_USAGE_DYNAMIC); assert(MapType != MAP_TYPE_WRITE_NOOVERWRITE); UINT MipIndex, PlaneIndex, ArrayIndex; pResource->DecomposeSubresource(Subresource, MipIndex, ArrayIndex, PlaneIndex); const bool bNeedsReadbackCopy = MapType == MAP_TYPE_READ || MapType == MAP_TYPE_READWRITE // Note that MAP_TYPE_WRITE could be optimized to keep around a copy of the last-written // data and simply re-upload that on unmap, rather than doing a full read-modify-write loop // What we can't do, is skip the readback, because we don't have the current contents available // to be modified, and this isn't a discard operation. // Currently the only scenario that hits this is a shared vertex/index buffer in 9on12, so we don't care yet. || MapType == MAP_TYPE_WRITE; // If the app uses DoNotWait with a read flag, the translation layer is guaranteed to need to do GPU work // in order to copy the GPU data to a readback heap. As a result the first call to map will initiate the copy // and return that a draw is still in flight. The next map that's called after the copy is finished will // succeed. bool bReadbackCopyInFlight = pResource->GetCurrentCpuHeap(Subresource) != nullptr && !pResource->GetDynamicTextureData(Subresource).AnyPlaneMapped(); if (bReadbackCopyInFlight) { // If an app modifies the resource after a readback copy has been initiated but not mapped again, // make sure to invalidate the old copy so that the app auto& exclusiveState = pResource->m_Identity->m_currentState.GetExclusiveSubresourceState(Subresource); const bool bPreviousCopyInvalid = // If the app changed it's mind and decided it doesn't need to readback anymore, // we can throw out the copy since they'll be modifying the resource anyways !bNeedsReadbackCopy || // The copy was done on the graphics queue so it's been modified // if the last write state was outside of graphics or newer than the // last exclusive state's fence value exclusiveState.CommandListType != COMMAND_LIST_TYPE::GRAPHICS || exclusiveState.FenceValue > pResource->GetLastCopyCommandListID(Subresource); if (bPreviousCopyInvalid) { pResource->SetCurrentCpuHeap(Subresource, nullptr); bReadbackCopyInFlight = false; } } // For planar textures, the upload buffer is created for all planes when all planes // were previously not mapped if (!pResource->GetDynamicTextureData(Subresource).AnyPlaneMapped()) { if (!bReadbackCopyInFlight) { assert(pResource->GetEffectiveUsage() == RESOURCE_USAGE_DYNAMIC); auto desc12 = pResource->m_creationArgs.m_desc12; auto& Placement = pResource->GetSubresourcePlacement(Subresource); desc12.MipLevels = 1; desc12.Width = Placement.Footprint.Width; desc12.Height = Placement.Footprint.Height; desc12.DepthOrArraySize = static_cast( desc12.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? Placement.Footprint.Depth : 1); RESOURCE_CPU_ACCESS cpuAccess = (bNeedsReadbackCopy ? RESOURCE_CPU_ACCESS_READ : RESOURCE_CPU_ACCESS_NONE) | (MapType != MAP_TYPE_READ ? RESOURCE_CPU_ACCESS_WRITE : RESOURCE_CPU_ACCESS_NONE); auto creationArgsCopy = pResource->m_creationArgs; creationArgsCopy.m_appDesc = AppResourceDesc(desc12, RESOURCE_USAGE_STAGING, cpuAccess, RESOURCE_BIND_NONE); UINT64 resourceSize = 0; m_pDevice12->GetCopyableFootprints(&desc12, 0, creationArgsCopy.m_appDesc.NonOpaquePlaneCount(), 0, nullptr, nullptr, nullptr, &resourceSize); creationArgsCopy.m_heapDesc = CD3DX12_HEAP_DESC(resourceSize, Resource::GetD3D12HeapType(RESOURCE_USAGE_STAGING, cpuAccess)); creationArgsCopy.m_heapType = AllocatorHeapType::None; creationArgsCopy.m_flags11.BindFlags = 0; creationArgsCopy.m_flags11.MiscFlags = 0; creationArgsCopy.m_flags11.CPUAccessFlags = (bNeedsReadbackCopy ? D3D11_CPU_ACCESS_READ : 0) | (MapType != MAP_TYPE_READ ? D3D11_CPU_ACCESS_WRITE : 0); creationArgsCopy.m_flags11.StructureByteStride = 0; unique_comptr renameResource = Resource::CreateResource(this, creationArgsCopy, ResourceAllocationContext::FreeThread); pResource->SetCurrentCpuHeap(Subresource, renameResource.get()); CD3DX12_RANGE ReadRange(0, 0); // Map(DISCARD) is write-only D3D12_RANGE MappedRange = renameResource->GetSubresourceRange(0, pReadWriteRange); if (bNeedsReadbackCopy) { // Maintain the illusion that this data is read by the CPU directly from the mapped resource. CDisablePredication DisablePredication(this); UINT DstX = pReadWriteRange ? pReadWriteRange->left : 0u; UINT DstY = pReadWriteRange ? pReadWriteRange->top : 0u; UINT DstZ = pReadWriteRange ? pReadWriteRange->front : 0u; // Copy each plane. for (UINT iPlane = 0; iPlane < pResource->AppDesc()->NonOpaquePlaneCount(); ++iPlane) { UINT planeSubresource = pResource->GetSubresourceIndex(iPlane, MipIndex, ArrayIndex); ResourceCopyRegion(renameResource.get(), iPlane, DstX, DstY, DstZ, pResource, planeSubresource, pReadWriteRange); } pResource->SetLastCopyCommandListID(Subresource, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); } } } { // Synchronize for the Map. Renamed resource has no mips or array slices. Resource* pRenameResource = pResource->GetCurrentCpuHeap(Subresource); assert(pRenameResource->AppDesc()->MipLevels() == 1); assert(pRenameResource->AppDesc()->ArraySize() == 1); if (!MapUnderlyingSynchronize(pRenameResource, PlaneIndex, MapType, DoNotWait, pReadWriteRange, pMap)) { return false; } } // Record that the given plane was mapped and is now dirty pResource->GetDynamicTextureData(Subresource).m_MappedPlaneRefCount[PlaneIndex]++; pResource->GetDynamicTextureData(Subresource).m_DirtyPlaneMask |= (1 << PlaneIndex); return true; } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::MapUnderlying(Resource* pResource, UINT Subresource, MAP_TYPE MapType, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMap ) { assert(pResource->AppDesc()->Usage() == RESOURCE_USAGE_DYNAMIC || pResource->AppDesc()->Usage() == RESOURCE_USAGE_STAGING); assert(pResource->OwnsReadbackHeap() || pResource->UnderlyingResourceIsSuballocated()); auto pResource12 = pResource->GetUnderlyingResource(); void* pData; // Write-only means read range can be empty D3D12_RANGE MappedRange = pResource->GetSubresourceRange(Subresource, pReadWriteRange); D3D12_RANGE ReadRange = MapType == MAP_TYPE_WRITE ? CD3DX12_RANGE(0, 0) : MappedRange; HRESULT hr = pResource12->Map(0, &ReadRange, &pData); ThrowFailure(hr); // throw( _com_error ) auto& SubresourceInfo = pResource->GetSubresourcePlacement(Subresource); pMap->pData = reinterpret_cast(reinterpret_cast(pData) + MappedRange.Begin); pMap->RowPitch = SubresourceInfo.Footprint.RowPitch; pMap->DepthPitch = pResource->DepthPitch(Subresource); return true; } //---------------------------------------------------------------------------------------------------------------------------------- bool ImmediateContext::SynchronizeForMap(Resource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait) { if (MapType == MAP_TYPE_READ || MapType == MAP_TYPE_READWRITE) { GetCommandListManager(COMMAND_LIST_TYPE::GRAPHICS)->ReadbackInitiated(); } auto& CurrentState = pResource->m_Identity->m_currentState; assert(CurrentState.SupportsSimultaneousAccess() || // Disabling simultaneous access for suballocated buffers but they're technically okay to map this way !pResource->GetIdentity()->m_bOwnsUnderlyingResource || // 9on12 special case pResource->OwnsReadbackHeap() || // For Map(DEFAULT) we should've made sure we're in common CurrentState.GetExclusiveSubresourceState(Subresource).State == D3D12_RESOURCE_STATE_COMMON || // Or we're not mapping the actual resource pResource->GetCurrentCpuHeap(Subresource) != nullptr); // We want to synchronize against the last command list to write to this subresource if // either the last op to it was a write (or in the same command list as a write), // or if we're only mapping it for read. bool bUseExclusiveState = MapType == MAP_TYPE_READ || CurrentState.IsExclusiveState(Subresource); if (bUseExclusiveState) { auto& ExclusiveState = CurrentState.GetExclusiveSubresourceState(Subresource); if (ExclusiveState.CommandListType == COMMAND_LIST_TYPE::UNKNOWN) { // Resource either has never been used, or at least never written to. return true; } return WaitForFenceValue(ExclusiveState.CommandListType, ExclusiveState.FenceValue, DoNotWait); // throws } else { // Wait for all reads of the resource to be done before allowing write access. auto& SharedState = CurrentState.GetSharedSubresourceState(Subresource); for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (SharedState.FenceValues[i] > 0 && !WaitForFenceValue((COMMAND_LIST_TYPE)i, SharedState.FenceValues[i], DoNotWait)) // throws { return false; } } return true; } } //---------------------------------------------------------------------------------------------------------------------------------- bool ImmediateContext::WaitForFenceValue(COMMAND_LIST_TYPE type, UINT64 FenceValue, bool DoNotWait) { if (DoNotWait) { if (FenceValue == GetCommandListID(type)) { SubmitCommandList(type); // throws on CommandListManager::CloseCommandList(...) } if (FenceValue > GetCompletedFenceValue(type)) { return false; } return true; } else { return WaitForFenceValue(type, FenceValue); // throws } } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::Map(Resource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMappedSubresource) { switch (pResource->AppDesc()->Usage()) { case RESOURCE_USAGE_DEFAULT: return MapDefault(pResource, Subresource, MapType, false, pReadWriteRange, pMappedSubresource); case RESOURCE_USAGE_DYNAMIC: switch (MapType) { case MAP_TYPE_READ: case MAP_TYPE_READWRITE: if (pResource->m_creationArgs.m_heapDesc.Properties.CPUPageProperty != D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE) { return MapUnderlyingSynchronize(pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); } else { return MapDynamicTexture(pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); } case MAP_TYPE_WRITE_DISCARD: if (pResource->m_creationArgs.m_heapDesc.Properties.CPUPageProperty != D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE) { return MapDiscardBuffer(pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); } else { return MapDynamicTexture(pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); } case MAP_TYPE_WRITE_NOOVERWRITE: assert(pResource->AppDesc()->CPUAccessFlags() == RESOURCE_CPU_ACCESS_WRITE); assert(pResource->AppDesc()->ResourceDimension() == D3D12_RESOURCE_DIMENSION_BUFFER); return MapUnderlying(pResource, Subresource, MapType, pReadWriteRange, pMappedSubresource); case MAP_TYPE_WRITE: assert(pResource->AppDesc()->ResourceDimension() == D3D12_RESOURCE_DIMENSION_BUFFER); if (pResource->m_creationArgs.m_heapDesc.Properties.CPUPageProperty != D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE) { return MapUnderlyingSynchronize(pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); } else { return MapDynamicTexture(pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); } } break; case RESOURCE_USAGE_STAGING: return MapUnderlyingSynchronize(pResource, Subresource, MapType, DoNotWait, pReadWriteRange, pMappedSubresource); case RESOURCE_USAGE_IMMUTABLE: default: assert(false); } return false; } void TRANSLATION_API ImmediateContext::Unmap(Resource* pResource, UINT Subresource, MAP_TYPE MapType, _In_opt_ const D3D12_BOX *pReadWriteRange) { switch (pResource->AppDesc()->Usage()) { case RESOURCE_USAGE_DEFAULT: UnmapDefault(pResource, Subresource, pReadWriteRange); break; case RESOURCE_USAGE_DYNAMIC: if (pResource->m_creationArgs.m_heapDesc.Properties.CPUPageProperty != D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE) { UnmapUnderlyingSimple(pResource, Subresource, pReadWriteRange); } else { UnmapDynamicTexture(pResource, Subresource, pReadWriteRange, MapType != MAP_TYPE_READ); } break; case RESOURCE_USAGE_STAGING: UnmapUnderlyingStaging(pResource, Subresource, pReadWriteRange); break; case RESOURCE_USAGE_IMMUTABLE: assert(false); break; } } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::MapDefault(Resource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMap ) { auto pResource12 = pResource->GetUnderlyingResource(); if (pResource->Parent()->ResourceDimension12() != D3D12_RESOURCE_DIMENSION_BUFFER) { m_ResourceStateManager.TransitionSubresource(pResource, Subresource, D3D12_RESOURCE_STATE_COMMON, COMMAND_LIST_TYPE::UNKNOWN, SubresourceTransitionFlags::StateMatchExact | SubresourceTransitionFlags::NotUsedInCommandListIfNoStateChange); m_ResourceStateManager.ApplyAllResourceTransitions(); if (MapType == MAP_TYPE_WRITE_NOOVERWRITE) { MapType = MAP_TYPE_WRITE; } } // Write-only means read range can be empty bool bWriteOnly = (MapType == MAP_TYPE_WRITE || MapType == MAP_TYPE_WRITE_NOOVERWRITE); D3D12_RANGE MappedRange = pResource->GetSubresourceRange(Subresource, pReadWriteRange); D3D12_RANGE ReadRange = bWriteOnly ? CD3DX12_RANGE(0, 0) : MappedRange; // If we know we are not reading, pass an empty range, otherwise pass a null (full) range D3D12_RANGE* pNonStandardReadRange = bWriteOnly ? &ReadRange : nullptr; assert(pResource->AppDesc()->Usage() == RESOURCE_USAGE_DEFAULT); bool bSynchronizationSucceeded = true; bool bSyncronizationNeeded = MapType != MAP_TYPE_WRITE_NOOVERWRITE; if (bSyncronizationNeeded) { bSynchronizationSucceeded = SynchronizeForMap(pResource, Subresource, MapType, DoNotWait); } if (bSynchronizationSucceeded) { const D3D12_PLACED_SUBRESOURCE_FOOTPRINT &Placement = pResource->GetSubresourcePlacement(Subresource); if (pResource->m_Identity->m_bPlacedTexture || pResource->Parent()->ResourceDimension12() == D3D12_RESOURCE_DIMENSION_BUFFER) { // Map default row-major texture or buffer pResource12->Map(0, &ReadRange, &pMap->pData); pMap->pData = reinterpret_cast(pMap->pData) + MappedRange.Begin; pMap->RowPitch = Placement.Footprint.RowPitch; pMap->DepthPitch = pResource->DepthPitch(Subresource); } else if (pResource->Parent()->ApiTextureLayout12() == D3D12_TEXTURE_LAYOUT_64KB_STANDARD_SWIZZLE) { // Not supporting map calls on standard swizzle textures with a specified subrange assert(!pReadWriteRange); // Map default standard swizzle texture D3D11_TILE_SHAPE TileShape; CD3D11FormatHelper::GetTileShape(&TileShape, Placement.Footprint.Format, pResource->Parent()->ResourceDimension11(), pResource->AppDesc()->Samples()); pResource12->Map(Subresource, pNonStandardReadRange, &pMap->pData); // Logic borrowed from WARP UINT TileWidth = (Placement.Footprint.Width + TileShape.WidthInTexels - 1) / TileShape.WidthInTexels; UINT TileHeight = (Placement.Footprint.Height + TileShape.HeightInTexels - 1) / TileShape.HeightInTexels; pMap->RowPitch = TileWidth * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; pMap->DepthPitch = TileWidth * TileHeight * D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES; } else { // Opaque: Simply cache the map pResource12->Map(Subresource, pNonStandardReadRange, nullptr); } } return bSynchronizationSucceeded; } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::WriteToSubresource(Resource* pDstResource, UINT DstSubresource, _In_opt_ const D3D11_BOX* pDstBox, const void* pSrcData, UINT SrcRowPitch, UINT SrcDepthPitch) { pDstResource->GetUnderlyingResource()->WriteToSubresource(DstSubresource, reinterpret_cast(pDstBox), pSrcData, SrcRowPitch, SrcDepthPitch); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ReadFromSubresource(void* pDstData, UINT DstRowPitch, UINT DstDepthPitch, Resource* pSrcResource, UINT SrcSubresource, _In_opt_ const D3D11_BOX* pSrcBox) { pSrcResource->GetUnderlyingResource()->ReadFromSubresource(pDstData, DstRowPitch, DstDepthPitch, SrcSubresource, reinterpret_cast(pSrcBox)); } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::MapUnderlyingSynchronize(Resource* pResource, UINT Subresource, MAP_TYPE MapType, bool DoNotWait, _In_opt_ const D3D12_BOX *pReadWriteRange, MappedSubresource* pMap ) { bool bSynchronizeSucceeded = SynchronizeForMap(pResource, Subresource, MapType, DoNotWait); if (bSynchronizeSucceeded) { if (pResource->m_FormatEmulationStagingData.empty()) { MapUnderlying(pResource, Subresource, MapType, pReadWriteRange, pMap); } else { assert(!pResource->m_Identity->m_bOwnsUnderlyingResource); Resource::EmulatedFormatMapState State = (MapType == MAP_TYPE_READ) ? Resource::EmulatedFormatMapState::Read : (MapType == MAP_TYPE_READWRITE) ? Resource::EmulatedFormatMapState::ReadWrite : Resource::EmulatedFormatMapState::Write; ZeroMemory(pMap, sizeof(*pMap)); SIZE_T StagingBufferSize = 0; CD3DX12_RANGE WrittenRange(0, 0); void* pMapped = nullptr; if (pResource->m_FormatEmulationStagingData[Subresource].m_MapState == State) { ++pResource->m_FormatEmulationStagingData[Subresource].m_MapRefCount; if (pResource->GetFormatEmulation() == FormatEmulation::YV12) { MapUnderlying(pResource, Subresource, MapType, pReadWriteRange, pMap); } else { assert(pResource->GetFormatEmulation() == FormatEmulation::None); pMap->pData = pResource->GetFormatEmulationSubresourceStagingAllocation(Subresource).get(); } } else if (pResource->m_FormatEmulationStagingData[Subresource].m_MapState == Resource::EmulatedFormatMapState::None) { if (pResource->GetFormatEmulation() == FormatEmulation::YV12) { assert(pResource->AppDesc()->Format() == DXGI_FORMAT_NV12); MapUnderlying(pResource, Subresource, MapType, pReadWriteRange, pMap); UINT MipIndex, PlaneIndex, ArrayIndex; pResource->DecomposeSubresource(Subresource, MipIndex, ArrayIndex, PlaneIndex); if (PlaneIndex == 1) { auto& UVPlacement = pResource->GetSubresourcePlacement(pResource->GetSubresourceIndex(1, MipIndex, ArrayIndex)); UINT YV12RowPitch = UVPlacement.Footprint.RowPitch >> 1; UINT UVWidth = UVPlacement.Footprint.Width; UINT UVHeight = UVPlacement.Footprint.Height; BYTE* pData = static_cast(pMap->pData); auto pbUTemp = m_SyncronousOpScrachSpace.GetBuffer(UVWidth * UVHeight); for (UINT i = 0; i < UVPlacement.Footprint.Height; ++i) { BYTE* pSrcUV = pData + i * pMap->RowPitch; BYTE* pDstV = pData + i * YV12RowPitch; BYTE* pDstU = pbUTemp + i * UVWidth; for (UINT j = 0; j < UVWidth; ++j) { *pDstU++ = *pSrcUV++; *pDstV++ = *pSrcUV++; } } for (UINT i = 0; i < UVHeight; ++i) { BYTE* pDstU = pData + (UVHeight + i) * YV12RowPitch; BYTE* pSrcU = pbUTemp + i * UVWidth; memcpy (pDstU, pSrcU, UVWidth); } } } else { assert(!pReadWriteRange); // TODO: Add handling for DSVs that are only getting a subsection mapped assert(pResource->AppDesc()->Usage() == RESOURCE_USAGE_STAGING); assert(pResource->SubresourceMultiplier() == 2); assert(pResource->GetFormatEmulation() == FormatEmulation::None); { auto& Placement = pResource->GetSubresourcePlacement(pResource->GetExtendedSubresourceIndex(Subresource, 0)); CD3D11FormatHelper::CalculateResourceSize(Placement.Footprint.Width, Placement.Footprint.Height, 1, pResource->AppDesc()->Format(), 1, 1, StagingBufferSize, reinterpret_cast(pMap)); } auto& pStagingBuffer = pResource->GetFormatEmulationSubresourceStagingAllocation(Subresource); if (!pStagingBuffer) { void* pData = AlignedHeapAlloc16(StagingBufferSize); if (!pData) { ThrowFailure(E_OUTOFMEMORY); } pStagingBuffer.reset(reinterpret_cast(pData)); } // The interleaving copy uses |= to write each plane's contents into the dest, // which will break if the destination contains garbage ZeroMemory(pStagingBuffer.get(), StagingBufferSize); // Note: Always have to interleave the source contents // Since MAP_WRITE does not imply old contents are discarded, we must provide // the buffer filled with the correct previous contents CD3DX12_RANGE ReadRange(pResource->GetSubresourceRange(pResource->GetExtendedSubresourceIndex(Subresource, 0)).Begin, pResource->GetSubresourceRange(pResource->GetExtendedSubresourceIndex(Subresource, 1)).End); pResource->GetUnderlyingResource()->Map(0, &ReadRange, &pMapped); for (UINT i = 0; i < pResource->SubresourceMultiplier(); ++i) { UINT ExtendedSubresource = pResource->GetExtendedSubresourceIndex(Subresource, i); auto& Placement = pResource->GetSubresourcePlacement(ExtendedSubresource); BYTE* pSrcPlane = reinterpret_cast(pMapped) + Placement.Offset; DXGI_FORMAT parentFormat = GetParentForFormat(pResource->AppDesc()->Format()); DepthStencilInterleavingReadback(parentFormat, i, pSrcPlane, Placement.Footprint.RowPitch, pStagingBuffer.get(), pMap->RowPitch, Placement.Footprint.Width, Placement.Footprint.Height); } pResource->GetUnderlyingResource()->Unmap(0, &WrittenRange); pMap->pData = pStagingBuffer.get(); } pResource->m_FormatEmulationStagingData[Subresource].m_MapState = State; pResource->m_FormatEmulationStagingData[Subresource].m_MapRefCount = 1; } else { ThrowFailure(E_FAIL); } } } return bSynchronizeSucceeded; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::UnmapDefault(Resource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange) { auto pResource12 = pResource->GetUnderlyingResource(); if (pResource->m_Identity->m_bPlacedTexture) { Subresource = 0; } // No way to tell whether the map that is being undone was a READ, WRITE, or both, so key off CPU access flags // to determine if data could've been written by the CPU bool bCouldBeWritten = (pResource->AppDesc()->CPUAccessFlags() & RESOURCE_CPU_ACCESS_WRITE) != 0; bool bRowMajorPattern = pResource->m_Identity->m_bPlacedTexture || pResource->Parent()->ResourceDimension12() == D3D12_RESOURCE_DIMENSION_BUFFER; // If we couldn't have written anything, pass an empty range D3D12_RANGE WrittenRange = bCouldBeWritten ? pResource->GetSubresourceRange(Subresource, pReadWriteRange) : CD3DX12_RANGE(0, 0); // If we know how much we could've written, pass the range. If we know we didn't, pass an empty range. Otherwise, pass null. D3D12_RANGE* pWrittenRange = bRowMajorPattern || !bCouldBeWritten ? &WrittenRange : nullptr; pResource12->Unmap(Subresource, pWrittenRange); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::UnmapUnderlyingSimple(Resource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange) { assert(pResource->AppDesc()->Usage() == RESOURCE_USAGE_DYNAMIC || pResource->AppDesc()->Usage() == RESOURCE_USAGE_STAGING); assert(pResource->OwnsReadbackHeap() || pResource->UnderlyingResourceIsSuballocated()); auto pResource12 = pResource->GetUnderlyingResource(); D3D12_RANGE WrittenRange = pResource->GetSubresourceRange(Subresource, pReadWriteRange); pResource12->Unmap(0, &WrittenRange); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::UnmapUnderlyingStaging(Resource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange) { assert(pResource->AppDesc()->Usage() == RESOURCE_USAGE_DYNAMIC || pResource->AppDesc()->Usage() == RESOURCE_USAGE_STAGING); assert(pResource->OwnsReadbackHeap() || !pResource->m_Identity->m_bOwnsUnderlyingResource); if (pResource->m_FormatEmulationStagingData.empty()) { auto pResource12 = pResource->GetUnderlyingResource(); // No way to tell whether the map that is being undone was a READ, WRITE, or both, so key off CPU access flags // to determine if data could've been written by the CPU. If we couldn't have written anything, pass an empty range bool bCouldBeWritten = (pResource->AppDesc()->CPUAccessFlags() & RESOURCE_CPU_ACCESS_WRITE) != 0; D3D12_RANGE WrittenRange = bCouldBeWritten ? pResource->GetSubresourceRange(Subresource, pReadWriteRange) : CD3DX12_RANGE(0, 0); pResource12->Unmap(0, &WrittenRange); } else if (pResource->m_FormatEmulationStagingData[Subresource].m_MapState == Resource::EmulatedFormatMapState::None) { ThrowFailure(E_FAIL); } else if (--pResource->m_FormatEmulationStagingData[Subresource].m_MapRefCount == 0) { assert(!pReadWriteRange); // Not supporting mapped DSVs with subranges auto State = pResource->m_FormatEmulationStagingData[Subresource].m_MapState; bool bDoCopy = (State != Resource::EmulatedFormatMapState::Read) || pResource->IsInplaceFormatEmulation(); if (bDoCopy) { CD3DX12_RANGE ReadRange(0, 0); void* pMapped; if (pResource->GetFormatEmulation() == FormatEmulation::YV12) { D3D12_RANGE Range; UINT MipIndex, PlaneIndex, ArrayIndex; pResource->DecomposeSubresource(Subresource, MipIndex, ArrayIndex, PlaneIndex); if (PlaneIndex == 1) { auto& UVPlacement = pResource->GetSubresourcePlacement(Subresource); UINT YV12RowPitch = UVPlacement.Footprint.RowPitch >> 1; UINT UVWidth = UVPlacement.Footprint.Width; UINT UVHeight = UVPlacement.Footprint.Height; UINT UVRowPitch = UVPlacement.Footprint.RowPitch; pResource->GetUnderlyingResource()->Map(0, &ReadRange, &pMapped); BYTE* pData = static_cast(pMapped) + UVPlacement.Offset; BYTE* pbVTemp = m_SyncronousOpScrachSpace.GetBuffer(UVWidth * UVHeight); for (UINT i = 0; i < UVHeight; ++i) { BYTE* pDstV = pbVTemp + i * UVWidth; BYTE* pSrcV = pData + i * YV12RowPitch; memcpy (pDstV, pSrcV, UVWidth); } for (UINT i = 0; i < UVHeight; ++i) { BYTE* pSrcV = pbVTemp + i * UVWidth; BYTE* pSrcU = pData + (UVHeight + i ) * YV12RowPitch; BYTE* pDstUV = pData + i * UVRowPitch; for (UINT j = 0; j < UVWidth; ++j) { *pDstUV++ = *pSrcU++; *pDstUV++ = *pSrcV++; } } Range = pResource->GetSubresourceRange(Subresource); pResource->GetUnderlyingResource()->Unmap(0, &Range); } Range = pResource->GetSubresourceRange(Subresource); pResource->GetUnderlyingResource()->Unmap(0, &Range); } else { auto& pStagingBuffer = pResource->GetFormatEmulationSubresourceStagingAllocation(Subresource); assert(pStagingBuffer.get() != nullptr); assert(pResource->GetFormatEmulation() == FormatEmulation::None); UINT InterleavedRowPitch; { auto& Placement = pResource->GetSubresourcePlacement(pResource->GetExtendedSubresourceIndex(Subresource, 0)); CD3D11FormatHelper::CalculateMinimumRowMajorRowPitch(pResource->AppDesc()->Format(), Placement.Footprint.Width, InterleavedRowPitch); } CD3DX12_RANGE WrittenRange(pResource->GetSubresourceRange(pResource->GetExtendedSubresourceIndex(Subresource, 0)).Begin, pResource->GetSubresourceRange(pResource->GetExtendedSubresourceIndex(Subresource, 1)).End); pResource->GetUnderlyingResource()->Map(0, &ReadRange, &pMapped); for (UINT i = 0; i < pResource->SubresourceMultiplier(); ++i) { UINT ExtendedSubresource = pResource->GetExtendedSubresourceIndex(Subresource, i); auto& Placement = pResource->GetSubresourcePlacement(ExtendedSubresource); BYTE* pDstPlane = reinterpret_cast(pMapped) + Placement.Offset; DXGI_FORMAT parentFormat = GetParentForFormat(pResource->AppDesc()->Format()); DepthStencilDeInterleavingUpload(parentFormat, i, pStagingBuffer.get(), InterleavedRowPitch, pDstPlane, Placement.Footprint.RowPitch, Placement.Footprint.Width, Placement.Footprint.Height); } pResource->GetUnderlyingResource()->Unmap(0, &WrittenRange); pStagingBuffer.reset(nullptr); } } else if (pResource->GetFormatEmulation() != FormatEmulation::None) { assert(pResource->GetFormatEmulation() == FormatEmulation::YV12); auto Range = CD3DX12_RANGE(0, 0); pResource->GetUnderlyingResource()->Unmap(0, &Range); } pResource->m_FormatEmulationStagingData[Subresource].m_MapState = Resource::EmulatedFormatMapState::None; } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::UnmapDynamicTexture(Resource* pResource, UINT Subresource, _In_opt_ const D3D12_BOX *pReadWriteRange, bool bUploadMappedContents) { #ifdef USE_PIX PIXScopedEvent(GetGraphicsCommandList(), 0ull, L"Unmap non-mappable resource"); #endif UINT MipIndex, PlaneIndex, ArrayIndex; pResource->DecomposeSubresource(Subresource, MipIndex, ArrayIndex, PlaneIndex); assert(pResource->AppDesc()->Usage() == RESOURCE_USAGE_DYNAMIC); assert(pResource->GetCurrentCpuHeap(Subresource) != nullptr); Resource* pRenameResource = pResource->GetCurrentCpuHeap(Subresource); // If multiple planes of the dynamic texture were mapped simultaneously, only copy // data from the upload buffer once all planes have been unmapped. assert(pResource->GetDynamicTextureData(Subresource).m_MappedPlaneRefCount[PlaneIndex] > 0); pResource->GetDynamicTextureData(Subresource).m_MappedPlaneRefCount[PlaneIndex]--; if (bUploadMappedContents) { UnmapUnderlyingStaging(pRenameResource, PlaneIndex, pReadWriteRange); } else { UnmapUnderlyingSimple(pRenameResource, PlaneIndex, pReadWriteRange); } if (pResource->GetDynamicTextureData(Subresource).AnyPlaneMapped()) { return; } if(bUploadMappedContents) { // Maintain the illusion that data is written by the CPU directly to this resource. CDisablePredication DisablePredication(this); UINT DstX = pReadWriteRange ? pReadWriteRange->left : 0u; UINT DstY = pReadWriteRange ? pReadWriteRange->top : 0u; UINT DstZ = pReadWriteRange ? pReadWriteRange->front : 0u; // Copy each plane. for (UINT iPlane = 0; iPlane < pResource->AppDesc()->NonOpaquePlaneCount(); ++iPlane) { if ( pResource->Parent()->ResourceDimension12() == D3D12_RESOURCE_DIMENSION_BUFFER || pResource->GetDynamicTextureData(Subresource).m_DirtyPlaneMask & (1 << iPlane)) { UINT planeSubresource = pResource->GetSubresourceIndex(iPlane, MipIndex, ArrayIndex); ResourceCopyRegion(pResource, planeSubresource, DstX, DstY, DstZ, pRenameResource, iPlane, pReadWriteRange); } } pResource->GetDynamicTextureData(Subresource).m_DirtyPlaneMask = 0; } pResource->SetCurrentCpuHeap(Subresource, nullptr); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::GetMipPacking(Resource* pResource, _Out_ UINT* pNumPackedMips, _Out_ UINT* pNumTilesForPackedMips ) { *pNumPackedMips = pResource->AppDesc()->MipLevels() - pResource->m_TiledResource.m_NumStandardMips; *pNumTilesForPackedMips = pResource->m_TiledResource.m_NumTilesForPackedMips; } //---------------------------------------------------------------------------------------------------------------------------------- HRESULT TRANSLATION_API ImmediateContext::CheckFormatSupport(_Out_ D3D12_FEATURE_DATA_FORMAT_SUPPORT& formatData) { return m_pDevice12->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &formatData, sizeof(formatData)); } //---------------------------------------------------------------------------------------------------------------------------------- bool ImmediateContext::SupportsRenderTarget(DXGI_FORMAT Format) { D3D12_FEATURE_DATA_FORMAT_SUPPORT SupportStruct = {}; SupportStruct.Format = Format; return ( SUCCEEDED(CheckFormatSupport(SupportStruct)) && (SupportStruct.Support1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) == D3D12_FORMAT_SUPPORT1_RENDER_TARGET); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::CheckMultisampleQualityLevels(DXGI_FORMAT format, UINT SampleCount, D3D12_MULTISAMPLE_QUALITY_LEVEL_FLAGS Flags, _Out_ UINT* pNumQualityLevels ) { D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS SupportStruct; SupportStruct.Format = format; SupportStruct.SampleCount = SampleCount; SupportStruct.Flags = Flags; HRESULT hr = m_pDevice12->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &SupportStruct, sizeof(SupportStruct)); *pNumQualityLevels = SUCCEEDED(hr) ? SupportStruct.NumQualityLevels : 0; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::CheckFeatureSupport(D3D12_FEATURE Feature, _Inout_updates_bytes_(FeatureSupportDataSize)void* pFeatureSupportData, UINT FeatureSupportDataSize) { ThrowFailure(m_pDevice12->CheckFeatureSupport(Feature, pFeatureSupportData, FeatureSupportDataSize)); } //---------------------------------------------------------------------------------------------------------------------------------- ImmediateContext::CDisablePredication::CDisablePredication(ImmediateContext* pParent) : m_pParent(pParent) { if (m_pParent) { m_pParent->SetPredicationInternal(nullptr, false); } } //---------------------------------------------------------------------------------------------------------------------------------- ImmediateContext::CDisablePredication::~CDisablePredication() { // Restore the predicate if (m_pParent && m_pParent->m_CurrentState.m_pPredicate) { m_pParent->m_CurrentState.m_pPredicate->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, m_pParent->GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); m_pParent->SetPredicationInternal(m_pParent->m_CurrentState.m_pPredicate, m_pParent->m_PredicateValue); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::CopyDataToBuffer( ID3D12Resource* pDstResource, UINT DstOffset, const void* pData, UINT Size ) noexcept(false) { // this operation should not be predicated (even if the application has enabled predication at the D3D11 API) CDisablePredication DisablePredication(this); const UINT AlignedSize = 1024; // To ensure good pool re-use assert(Size <= AlignedSize); auto UploadHeap = AcquireSuballocatedHeap(AllocatorHeapType::Upload, AlignedSize, ResourceAllocationContext::ImmediateContextThreadTemporary); // throw( _com_error ) void* pMapped; CD3DX12_RANGE ReadRange(0, 0); HRESULT hr = UploadHeap.Map(0, &ReadRange, &pMapped); ThrowFailure(hr); // throw( _com_error ) memcpy(pMapped, pData, Size); CD3DX12_RANGE WrittenRange(0, Size); UploadHeap.Unmap(0, &WrittenRange); GetGraphicsCommandList()->CopyBufferRegion( pDstResource, DstOffset, UploadHeap.GetResource(), UploadHeap.GetOffset(), Size ); AdditionalCommandsAdded(COMMAND_LIST_TYPE::GRAPHICS); ReleaseSuballocatedHeap( AllocatorHeapType::Upload, UploadHeap, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS), COMMAND_LIST_TYPE::GRAPHICS); PostUpload(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetHardwareProtection(Resource*, INT) { assert(false); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetHardwareProtectionState(BOOL) { } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::RotateResourceIdentities(Resource* const* ppResources, UINT Resources) { #ifdef USE_PIX PIXSetMarker(0ull, L"Swap resource identities"); #endif Resource* pLastResource = ppResources[0]; for (UINT i = 1; i <= Resources; ++i) { ++pLastResource->m_AllUniqueness; ++pLastResource->m_SRVUniqueness; CResourceBindings& bindingState = pLastResource->m_currentBindings; // Set dirty bits for all bound SRVs of this resource auto pHead = &bindingState.m_ShaderResourceViewList; for (auto pCur = pHead->Flink; pCur != pHead; pCur = pCur->Flink) { auto& viewBindings = *CONTAINING_RECORD(pCur, CViewBindings, m_ViewBindingList); for (UINT stage = 0; stage < ShaderStageCount; ++stage) { auto& stageState = m_CurrentState.GetStageState((EShaderStage)stage); stageState.m_SRVs.SetDirtyBits(viewBindings.m_BindPoints[stage]); } } // Set dirty bits for all bound UAVs of this resource pHead = &bindingState.m_UnorderedAccessViewList; for (auto pCur = pHead->Flink; pCur != pHead; pCur = pCur->Flink) { auto& viewBindings = *CONTAINING_RECORD(pCur, CViewBindings, m_ViewBindingList); m_CurrentState.m_UAVs.SetDirtyBits(viewBindings.m_BindPoints[e_Graphics]); m_CurrentState.m_CSUAVs.SetDirtyBits(viewBindings.m_BindPoints[e_Compute]); } // If the resource is bound as a render target, set the RTV dirty bit if (bindingState.IsBoundAsRenderTarget()) { m_DirtyStates |= e_RenderTargetsDirty; } // Handle buffer rotation too to simplify rename operations. if (bindingState.IsBoundAsVertexBuffer()) { m_DirtyStates |= e_VertexBuffersDirty; } if (bindingState.IsBoundAsIndexBuffer()) { m_DirtyStates |= e_IndexBufferDirty; } for (UINT stage = 0; stage < ShaderStageCount; ++stage) { auto& stageState = m_CurrentState.GetStageState((EShaderStage)stage); stageState.m_CBs.SetDirtyBits(bindingState.m_ConstantBufferBindings[stage]); } m_ResourceStateManager.TransitionResourceForBindings(pLastResource); if (i < Resources) { Resource* pCurrentResource = ppResources[i]; pCurrentResource->SwapIdentities(*pLastResource); pLastResource = pCurrentResource; } } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::TransitionResourceForView(ViewBase* pView, D3D12_RESOURCE_STATES desiredState) noexcept { m_ResourceStateManager.TransitionSubresources(pView->m_pResource, pView->m_subresources, desiredState); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::TransitionResourceForBindings(ViewBase* pView) noexcept { m_ResourceStateManager.TransitionSubresourcesForBindings(pView->m_pResource, pView->m_subresources); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::TransitionResourceForBindings(Resource* pResource) noexcept { m_ResourceStateManager.TransitionResourceForBindings(pResource); } //---------------------------------------------------------------------------------------------------------------------------------- HRESULT TRANSLATION_API ImmediateContext::ResolveSharedResource(Resource* pResource) { m_ResourceStateManager.TransitionResource(pResource, D3D12_RESOURCE_STATE_COMMON, COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags::StateMatchExact | SubresourceTransitionFlags::ForceExclusiveState | SubresourceTransitionFlags::NotUsedInCommandListIfNoStateChange); m_ResourceStateManager.ApplyAllResourceTransitions(); // All work referencing this resource needs to be submitted, not necessarily completed. // Even reads / shared access needs to be flushed, because afterwards, another process can write to this resource. auto& CurrentState = pResource->m_Identity->m_currentState; for (UINT i = 0; i < (CurrentState.AreAllSubresourcesSame() ? 1u : pResource->NumSubresources()); ++i) { auto& ExclusiveState = CurrentState.GetExclusiveSubresourceState(i); assert(ExclusiveState.IsMostRecentlyExclusiveState && ExclusiveState.CommandListType == COMMAND_LIST_TYPE::GRAPHICS); if (ExclusiveState.FenceValue == GetCommandListID(ExclusiveState.CommandListType)) { try { GetCommandListManager(COMMAND_LIST_TYPE::GRAPHICS)->PrepForCommandQueueSync(); // throws } catch (_com_error& e) { return e.Error(); } catch (std::bad_alloc&) { return E_OUTOFMEMORY; } break; } } return S_OK; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::GetSharedGDIHandle(_In_ Resource *pResource, _Out_ HANDLE *pHandle) { assert(pResource->Parent()->IsGDIStyleHandleShared()); ThrowFailure(m_pCompatDevice->ReflectSharedProperties(pResource->GetUnderlyingResource(), D3D12_REFLECT_SHARED_PROPERTY_NON_NT_SHARED_HANDLE, pHandle, sizeof(*pHandle))); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::CreateSharedNTHandle(_In_ Resource *pResource, _Out_ HANDLE *pHandle, _In_opt_ SECURITY_ATTRIBUTES *pSA) { assert(pResource->Parent()->IsNTHandleShared()); // Note: Not validated by this layer, but only called when this is true. ThrowFailure(m_pDevice12->CreateSharedHandle(pResource->GetUnderlyingResource(), pSA, GENERIC_ALL, nullptr, pHandle)); } //---------------------------------------------------------------------------------------------------------------------------------- bool RetiredObject::DeferredWaitsSatisfied(const std::vector& deferredWaits) { for (auto& deferredWait : deferredWaits) { if (deferredWait.value > deferredWait.fence->GetCompletedValue()) { return false; } } return true; } //---------------------------------------------------------------------------------------------------------------------------------- bool RetiredObject::ReadyToDestroy(ImmediateContext* pContext, bool completionRequired, UINT64 lastCommandListID, COMMAND_LIST_TYPE commandListType, const std::vector& deferredWaits) { bool readyToDestroy = true; if (completionRequired) { readyToDestroy = lastCommandListID <= pContext->GetCompletedFenceValue(commandListType); } else { readyToDestroy = lastCommandListID < pContext->GetCommandListID(commandListType); } if (readyToDestroy) { readyToDestroy = DeferredWaitsSatisfied(deferredWaits); } return readyToDestroy; } //---------------------------------------------------------------------------------------------------------------------------------- bool RetiredObject::ReadyToDestroy(ImmediateContext* pContext, bool completionRequired, const UINT64 lastCommandListIDs[(UINT)COMMAND_LIST_TYPE::MAX_VALID], const std::vector& deferredWaits) { bool readyToDestroy = true; if (completionRequired) { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID && readyToDestroy; i++) { if (lastCommandListIDs[i] != 0) { readyToDestroy = lastCommandListIDs[i] <= pContext->GetCompletedFenceValue((COMMAND_LIST_TYPE)i); } } } else { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID && readyToDestroy; i++) { if (lastCommandListIDs[i] != 0) { readyToDestroy = lastCommandListIDs[i] < pContext->GetCommandListID((COMMAND_LIST_TYPE)i); } } } if (readyToDestroy) { readyToDestroy = DeferredWaitsSatisfied(deferredWaits); } return readyToDestroy; } //---------------------------------------------------------------------------------------------------------------------------------- PipelineState* ImmediateContext::GetPipelineState() { return m_CurrentState.m_pPSO; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetPipelineState(PipelineState* pPipeline) { if (!m_CurrentState.m_pPSO || !pPipeline || m_CurrentState.m_pPSO->GetRootSignature() != pPipeline->GetRootSignature()) { m_DirtyStates |= e_GraphicsRootSignatureDirty | e_ComputeRootSignatureDirty; } m_CurrentState.m_pPSO = pPipeline; m_DirtyStates |= e_PipelineStateDirty; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearInputBindings(Resource* pResource) { if (pResource) { pResource->ClearInputBindings(); } } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::ClearOutputBindings(Resource* pResource) { if (pResource) { pResource->ClearOutputBindings(); } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ClearVBBinding(UINT slot) { if (m_CurrentState.m_VBs.UpdateBinding(slot, nullptr, e_Graphics)) { m_DirtyStates |= e_VertexBuffersDirty; } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ClearRTVBinding(UINT slot) { if (m_CurrentState.m_RTVs.UpdateBinding(slot, nullptr, e_Graphics)) { m_DirtyStates |= e_RenderTargetsDirty; } } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ClearDSVBinding() { if (m_CurrentState.m_DSVs.UpdateBinding(0, nullptr, e_Graphics)) { m_DirtyStates |= e_RenderTargetsDirty; } } //---------------------------------------------------------------------------------------------------------------------------------- DXGI_FORMAT ImmediateContext::GetParentForFormat(DXGI_FORMAT format) { return CD3D11FormatHelper::GetParentFormat(format); }; //---------------------------------------------------------------------------------------------------------------------------------- HRESULT TRANSLATION_API ImmediateContext::GetDeviceState() { return m_pDevice12->GetDeviceRemovedReason(); } //---------------------------------------------------------------------------------------------------------------------------------- TRANSLATION_API void ImmediateContext::Signal( _In_ Fence* pFence, UINT64 Value ) { if (m_pSyncOnlyQueue) { Flush(D3D12TranslationLayer::COMMAND_LIST_TYPE_ALL_MASK); for (UINT listTypeIndex = 0; listTypeIndex < static_cast(COMMAND_LIST_TYPE::MAX_VALID); ++listTypeIndex) { auto pManager = m_CommandLists[listTypeIndex].get(); if (!pManager) { continue; } UINT64 LastSignaledValue = pManager->GetCommandListID() - 1; ThrowFailure(m_pSyncOnlyQueue->Wait(pManager->GetFence()->Get(), LastSignaledValue)); pFence->UsedInCommandList(static_cast(listTypeIndex), LastSignaledValue); } ThrowFailure(m_pSyncOnlyQueue->Signal(pFence->Get(), Value)); } else { Flush(D3D12TranslationLayer::COMMAND_LIST_TYPE_GRAPHICS_MASK); ThrowFailure(GetCommandQueue(COMMAND_LIST_TYPE::GRAPHICS)->Signal(pFence->Get(), Value)); pFence->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS) - 1); } } //---------------------------------------------------------------------------------------------------------------------------------- TRANSLATION_API void ImmediateContext::Wait( std::shared_ptr const& pFence, UINT64 Value ) { if (pFence->DeferredWaits()) { m_ResourceStateManager.AddDeferredWait(pFence, Value); return; } Flush(D3D12TranslationLayer::COMMAND_LIST_TYPE_ALL_MASK); for (UINT listTypeIndex = 0; listTypeIndex < static_cast(COMMAND_LIST_TYPE::MAX_VALID); ++listTypeIndex) { auto pQueue = GetCommandQueue(static_cast(listTypeIndex)); if (pQueue) { ThrowFailure(pQueue->Wait(pFence->Get(), Value)); } } } //---------------------------------------------------------------------------------------------------------------------------------- unique_comptr ImmediateContext::AllocateHeap(UINT64 HeapSize, UINT64 alignment, AllocatorHeapType heapType) noexcept(false) { D3D12_HEAP_PROPERTIES Props = GetHeapProperties(GetD3D12HeapType(heapType)); D3D12_RESOURCE_DESC Desc = CD3DX12_RESOURCE_DESC::Buffer(HeapSize, D3D12_RESOURCE_FLAG_NONE, alignment); unique_comptr spResource; HRESULT hr = m_pDevice12->CreateCommittedResource( &Props, D3D12_HEAP_FLAG_NONE, &Desc, GetDefaultPoolState(heapType), nullptr, IID_PPV_ARGS(&spResource)); ThrowFailure(hr); // throw( _com_error ) // Cache the Map within the D3D12 resource. CD3DX12_RANGE NullRange(0, 0); void* pData = nullptr; ThrowFailure(spResource->Map(0, &NullRange, &pData)); return std::move(spResource); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::ClearState() { m_CurrentState.ClearState(); m_PrimitiveTopology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; m_uNumScissors = 0; m_uNumViewports = 0; m_ScissorRectEnable = false; memset(m_BlendFactor, 0, sizeof(m_BlendFactor)); memset(m_auVertexOffsets, 0, sizeof(m_auVertexOffsets)); memset(m_auVertexStrides, 0, sizeof(m_auVertexStrides)); m_DirtyStates |= e_DirtyOnFirstCommandList; m_StatesToReassert |= e_ReassertOnNewCommandList; } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetMarker([[maybe_unused]] const wchar_t* name) { #ifdef USE_PIX PIXSetMarker(GetGraphicsCommandList(), 0, L"D3D11 Marker: %s", name); #endif } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::BeginEvent([[maybe_unused]] const wchar_t* name) { #ifdef USE_PIX PIXBeginEvent(GetGraphicsCommandList(), 0, L"D3D11 Event: %s", name); #endif } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::EndEvent() { #ifdef USE_PIX PIXEndEvent(GetGraphicsCommandList()); #endif } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SharingContractPresent(_In_ Resource* pResource) { Flush(COMMAND_LIST_TYPE_GRAPHICS_MASK); auto pSharingContract = GetCommandListManager(COMMAND_LIST_TYPE::GRAPHICS)->GetSharingContract(); if (pSharingContract) { ID3D12Resource* pUnderlying = pResource->GetUnderlyingResource(); pSharingContract->Present(pUnderlying, 0, nullptr); } pResource->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); GetCommandListManager(COMMAND_LIST_TYPE::GRAPHICS)->SetNeedSubmitFence(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::Present( _In_reads_(uSrcSurfaces) PresentSurface const* pSrcSurfaces, UINT numSrcSurfaces, _In_opt_ Resource* pDest, UINT flipInterval, UINT vidPnSourceId, _In_ D3DKMT_PRESENT* pKMTPresent, bool bDoNotSequence, std::function pfnPresentCb) { if (bDoNotSequence) { // Blt with DoNotSequence is not supported in DX9/DX11. Supporting this would require extra tracking to // ensure defer deletion works correctly if (pDest) { ThrowFailure(E_INVALIDARG); } } else { if (!pKMTPresent->Flags.RedirectedFlip) { m_MaxFrameLatencyHelper.WaitForMaximumFrameLatency(); } PresentSurface PresentOverride; if (pDest) { assert(numSrcSurfaces == 1); Resource* pSource = pSrcSurfaces->m_pResource; if (pSource->AppDesc()->Samples() > 1) { Resource* pTemp = m_BltResolveManager.GetBltResolveTempForWindow(pKMTPresent->hWindow, *pSource); ResourceResolveSubresource(pTemp, 0, pSource, pSrcSurfaces->m_subresource, pSource->AppDesc()->Format()); PresentOverride.m_pResource = pTemp; PresentOverride.m_subresource = 0; numSrcSurfaces = 1; pSrcSurfaces = &PresentOverride; } } for (UINT i = 0; i < numSrcSurfaces; i++) { const UINT appSubresource = pSrcSurfaces[i].m_subresource; Resource* pResource = pSrcSurfaces[i].m_pResource; for (UINT iPlane = 0; iPlane < pResource->AppDesc()->NonOpaquePlaneCount(); ++iPlane) { UINT subresourceIndex = ConvertSubresourceIndexAddPlane(appSubresource, pResource->AppDesc()->SubresourcesPerPlane(), iPlane); GetResourceStateManager().TransitionSubresource(pResource, subresourceIndex, D3D12_RESOURCE_STATE_PRESENT, COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags::StateMatchExact); } } if (pDest) { GetResourceStateManager().TransitionResource(pDest, D3D12_RESOURCE_STATE_COPY_DEST); } GetResourceStateManager().ApplyAllResourceTransitions(); PresentCBArgs presentArgs = {}; presentArgs.pGraphicsCommandQueue = GetCommandQueue(COMMAND_LIST_TYPE::GRAPHICS); presentArgs.pGraphicsCommandList = GetCommandList(COMMAND_LIST_TYPE::GRAPHICS); presentArgs.pSrcSurfaces = pSrcSurfaces; presentArgs.numSrcSurfaces = numSrcSurfaces; presentArgs.pDest = pDest; presentArgs.flipInterval = flipInterval; presentArgs.vidPnSourceId = vidPnSourceId; presentArgs.pKMTPresent = pKMTPresent; ThrowFailure(pfnPresentCb(presentArgs)); GetCommandListManager(COMMAND_LIST_TYPE::GRAPHICS)->PrepForCommandQueueSync(); // throws } } HRESULT TRANSLATION_API ImmediateContext::CloseAndSubmitGraphicsCommandListForPresent( BOOL commandsAdded, _In_reads_(numSrcSurfaces) const PresentSurface* pSrcSurfaces, UINT numSrcSurfaces, _In_opt_ Resource* pDest, _In_ D3DKMT_PRESENT* pKMTPresent) { const auto commandListType = COMMAND_LIST_TYPE::GRAPHICS; if (commandsAdded) { AdditionalCommandsAdded(commandListType); } UINT commandListMask = D3D12TranslationLayer::COMMAND_LIST_TYPE_GRAPHICS_MASK; if (!Flush(commandListMask)) { CloseCommandList(commandListMask); ResetCommandList(commandListMask); } auto pSharingContract = GetCommandListManager(commandListType)->GetSharingContract(); if (pSharingContract) { for (UINT i = 0; i < numSrcSurfaces; ++i) { pSharingContract->Present(pSrcSurfaces[i].m_pResource->GetUnderlyingResource(), pSrcSurfaces[i].m_subresource, pKMTPresent->hWindow); } } // These must be marked after the flush so that they are defer deleted // Don't mark these for residency management as these aren't part of the next command list UINT64 CommandListID = GetCommandListID(commandListType); if (pDest) { pDest->UsedInCommandList(commandListType, CommandListID); } for (UINT i = 0; i < numSrcSurfaces; i++) { D3D12TranslationLayer::Resource* pResource = pSrcSurfaces[i].m_pResource; pResource->UsedInCommandList(commandListType, CommandListID); } if (!pKMTPresent->Flags.RedirectedFlip) { m_MaxFrameLatencyHelper.RecordPresentFenceValue(CommandListID); } return S_OK; } //---------------------------------------------------------------------------------------------------------------------------------- ImmediateContext::BltResolveManager::BltResolveManager(D3D12TranslationLayer::ImmediateContext& ImmCtx) : m_ImmCtx(ImmCtx) { } //---------------------------------------------------------------------------------------------------------------------------------- Resource* ImmediateContext::BltResolveManager::GetBltResolveTempForWindow(HWND hwnd, Resource& presentingResource) { auto& spTemp = m_Temps[hwnd]; auto pResourceDesc = presentingResource.Parent(); if (spTemp) { if (spTemp->AppDesc()->Format() != pResourceDesc->m_appDesc.Format() || spTemp->AppDesc()->Width() != pResourceDesc->m_appDesc.Width() || spTemp->AppDesc()->Height() != pResourceDesc->m_appDesc.Height()) { spTemp.reset(); } } if (!spTemp) { auto Desc = *pResourceDesc; Desc.m_appDesc.m_Samples = 1; Desc.m_appDesc.m_Quality = 0; Desc.m_desc12.SampleDesc.Count = 1; Desc.m_desc12.SampleDesc.Quality = 0; spTemp = Resource::CreateResource(&m_ImmCtx, Desc, ResourceAllocationContext::ImmediateContextThreadLongLived); } return spTemp.get(); } } ================================================ FILE: src/Main.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { TraceLoggingHProvider g_hTracelogging = nullptr; void SetTraceloggingProvider(TraceLoggingHProvider hTracelogging) { g_hTracelogging = hTracelogging; } } ================================================ FILE: src/MaxFrameLatencyHelper.cpp ================================================ #include "pch.h" namespace D3D12TranslationLayer { void MaxFrameLatencyHelper::Init(ImmediateContext* pImmCtx) { assert(pImmCtx != nullptr); m_pImmediateContext = pImmCtx; } void MaxFrameLatencyHelper::SetMaximumFrameLatency(UINT MaxFrameLatency) { assert(m_pImmediateContext != nullptr); std::lock_guard Lock(m_FrameLatencyLock); m_MaximumFrameLatency = MaxFrameLatency; } UINT MaxFrameLatencyHelper::GetMaximumFrameLatency() { std::lock_guard Lock(m_FrameLatencyLock); return m_MaximumFrameLatency; } bool MaxFrameLatencyHelper::IsMaximumFrameLatencyReached() { assert(m_pImmediateContext != nullptr); std::lock_guard Lock(m_FrameLatencyLock); UINT64 CompletedFenceValue = m_pImmediateContext->GetCompletedFenceValue(COMMAND_LIST_TYPE::GRAPHICS); while (m_PresentFenceValuesBegin != m_PresentFenceValuesEnd && *m_PresentFenceValuesBegin <= CompletedFenceValue) { ++m_PresentFenceValuesBegin; } return std::distance(m_PresentFenceValuesBegin, m_PresentFenceValuesEnd) >= (ptrdiff_t)m_MaximumFrameLatency; } void MaxFrameLatencyHelper::WaitForMaximumFrameLatency() { assert(m_pImmediateContext != nullptr); std::lock_guard Lock(m_FrameLatencyLock); // Looping, because max frame latency can be dropped, and we may // need to wait for multiple presents to complete here. while (IsMaximumFrameLatencyReached()) { m_pImmediateContext->WaitForFenceValue(COMMAND_LIST_TYPE::GRAPHICS, *m_PresentFenceValuesBegin); } } void MaxFrameLatencyHelper::RecordPresentFenceValue(UINT64 fenceValue) { std::lock_guard Lock(m_FrameLatencyLock); *m_PresentFenceValuesEnd = fenceValue; ++m_PresentFenceValuesEnd; } } ================================================ FILE: src/PipelineState.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { PipelineState::PipelineState(ImmediateContext *pContext, const GRAPHICS_PIPELINE_STATE_DESC &desc) : DeviceChildImpl(pContext) , m_PipelineStateType(e_Draw) , m_pRootSignature(pContext->CreateOrRetrieveRootSignature( RootSignatureDesc(desc.pVertexShader, desc.pPixelShader, desc.pGeometryShader, desc.pHullShader, desc.pDomainShader, pContext->RequiresBufferOutofBoundsHandling()))) { Graphics.m_Desc = desc; Graphics.m_Desc.pRootSignature = m_pRootSignature->GetForImmediateUse(); Graphics.pVertexShader = desc.pVertexShader; Graphics.pGeometryShader = desc.pGeometryShader; Graphics.pPixelShader = desc.pPixelShader; Graphics.pDomainShader = desc.pDomainShader; Graphics.pHullShader = desc.pHullShader; // Perform deep copy of embedded arrays // Note: decls containing strings reference their strings from shader bytecode and do not need to deep copy them here if (desc.InputLayout.NumElements) { spInputElements.reset(new D3D12_INPUT_ELEMENT_DESC[desc.InputLayout.NumElements]); // throw( bad_alloc ) memcpy(spInputElements.get(), desc.InputLayout.pInputElementDescs, sizeof(spInputElements[0]) * desc.InputLayout.NumElements); Graphics.m_Desc.InputLayout.pInputElementDescs = spInputElements.get(); } if (desc.StreamOutput.NumEntries) { spSODecls.reset(new D3D12_SO_DECLARATION_ENTRY[desc.StreamOutput.NumEntries]); // throw( bad_alloc ) memcpy(spSODecls.get(), desc.StreamOutput.pSODeclaration, sizeof(spSODecls[0]) * desc.StreamOutput.NumEntries); Graphics.m_Desc.StreamOutput.pSODeclaration = spSODecls.get(); } if (desc.StreamOutput.pBufferStrides) { memcpy(SOStrides, desc.StreamOutput.pBufferStrides, sizeof(UINT) * desc.StreamOutput.NumStrides); Graphics.m_Desc.StreamOutput.pBufferStrides = SOStrides; } Create(); } PipelineState::PipelineState(ImmediateContext *pContext, const COMPUTE_PIPELINE_STATE_DESC &desc) : DeviceChildImpl(pContext) , m_PipelineStateType(e_Dispatch) , m_pRootSignature(pContext->CreateOrRetrieveRootSignature( RootSignatureDesc(desc.pCompute, pContext->RequiresBufferOutofBoundsHandling()))) { Compute.m_Desc = desc; Compute.m_Desc.pRootSignature = m_pRootSignature->GetForImmediateUse(); Compute.pComputeShader = desc.pCompute; Create(); } PipelineState::~PipelineState() { if (m_pParent->GetPipelineState() == this) { m_pParent->SetPipelineState(nullptr); } } template struct PSOTraits; template<> struct PSOTraits { static decltype(&ID3D12Device::CreateGraphicsPipelineState) GetCreate() { return &ID3D12Device::CreateGraphicsPipelineState; } static const D3D12_GRAPHICS_PIPELINE_STATE_DESC &GetDesc(PipelineState &p) { return p.GetGraphicsDesc(); } }; template<> struct PSOTraits { static decltype(&ID3D12Device::CreateComputePipelineState) GetCreate() { return &ID3D12Device::CreateComputePipelineState; } static const D3D12_COMPUTE_PIPELINE_STATE_DESC &GetDesc(PipelineState &p) { return p.GetComputeDesc(); } }; template inline void PipelineState::Create() { if (m_pParent->m_spPSOCompilationThreadPool) { m_pParent->m_spPSOCompilationThreadPool->QueueThreadpoolWork(m_ThreadpoolWork, [this]() { try { CreateImpl(); } catch (_com_error&) {} }); } else { CreateImpl(); } } template inline void PipelineState::CreateImpl() { typedef PSOTraits PSOTraits; HRESULT hr = (m_pParent->m_pDevice12.get()->*PSOTraits::GetCreate())(&PSOTraits::GetDesc(*this), IID_PPV_ARGS(GetForCreate())); if (FAILED(hr)) { MICROSOFT_TELEMETRY_ASSERT(hr != E_INVALIDARG); if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "PSOCreationFailure", TraceLoggingInt32(0, "SchemaVersion"), TraceLoggingHResult(hr, "HResult"), TraceLoggingInt32(Type, "PSOType"), TraceLoggingKeyword(MICROSOFT_KEYWORD_MEASURES), TraceLoggingLevel(TRACE_LEVEL_ERROR)); } } ThrowFailure(hr); // throw( _com_error ) if (m_pParent->UseRoundTripPSOs()) { CComPtr spBlob; ThrowFailure(GetForImmediateUse()->GetCachedBlob(&spBlob)); // throw( _com_error ) if constexpr (Type == e_Draw) { Graphics.m_Desc.CachedPSO.pCachedBlob = spBlob->GetBufferPointer(); Graphics.m_Desc.CachedPSO.CachedBlobSizeInBytes = spBlob->GetBufferSize(); } else { Compute.m_Desc.CachedPSO.pCachedBlob = spBlob->GetBufferPointer(); Compute.m_Desc.CachedPSO.CachedBlobSizeInBytes = spBlob->GetBufferSize(); } hr = (m_pParent->m_pDevice12.get()->*PSOTraits::GetCreate())(&PSOTraits::GetDesc(*this), IID_PPV_ARGS(GetForCreate())); ThrowFailure(hr); // throw( _com_error ) } } } ================================================ FILE: src/Query.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //================================================================================================================================== // Async/query/predicate/counter //================================================================================================================================== Async::Async(ImmediateContext* pDevice, EQueryType Type, UINT CommandListTypeMask) noexcept : DeviceChild(pDevice) , m_Type(Type) , m_CommandListTypeMask(CommandListTypeMask & pDevice->GetCommandListTypeMaskForQuery(Type)) , m_CurrentState(AsyncState::Ended) { ZeroMemory(m_EndedCommandListID, sizeof(m_EndedCommandListID)); } Async::~Async() { if (m_CurrentState == AsyncState::Begun) { // Remove this query from the list of active queries D3D12TranslationLayer::RemoveEntryList(&m_ActiveQueryListEntry); } } //---------------------------------------------------------------------------------------------------------------------------------- void Async::Begin() noexcept { if (m_CurrentState == AsyncState::Begun) { End(); } assert(m_CurrentState == AsyncState::Ended); BeginInternal(true); m_CurrentState = AsyncState::Begun; // Add this query to the list of active queries D3D12TranslationLayer::InsertTailList(&m_pParent->m_ActiveQueryList, &m_ActiveQueryListEntry); } //---------------------------------------------------------------------------------------------------------------------------------- bool Async::RequiresBegin(EQueryType type) noexcept { switch (type) { case e_QUERY_EVENT: case e_QUERY_TIMESTAMP: return false; } return true; } bool Async::RequiresBegin() const noexcept { return RequiresBegin(m_Type); } //---------------------------------------------------------------------------------------------------------------------------------- void Async::End() noexcept { // Insert a begin for those query types that require it if (m_CurrentState == AsyncState::Ended) { if (RequiresBegin()) { Begin(); assert(m_CurrentState == AsyncState::Begun); } } if (m_CurrentState == AsyncState::Begun) { // Remove this query from the list of active queries D3D12TranslationLayer::RemoveEntryList(&m_ActiveQueryListEntry); } EndInternal(); m_CurrentState = AsyncState::Ended; ZeroMemory(m_EndedCommandListID, sizeof(m_EndedCommandListID)); for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (m_CommandListTypeMask & (1 << listType)) { m_EndedCommandListID[listType] = m_pParent->GetCommandListIDWithCommands((COMMAND_LIST_TYPE)listType); } } } //---------------------------------------------------------------------------------------------------------------------------------- bool Async::GetData(void* pData, UINT DataSize, bool DoNotFlush, bool AsyncGetData) noexcept { if (!AsyncGetData && !FlushAndPrep(DoNotFlush)) { return false; } if (pData != nullptr && DataSize != 0) { GetDataInternal(pData, DataSize); } return true; } //---------------------------------------------------------------------------------------------------------------------------------- bool Async::FlushAndPrep(bool DoNotFlush) noexcept { if (m_CurrentState == AsyncState::Begun) { End(); } assert(m_CurrentState == AsyncState::Ended); bool ret = true; for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (m_CommandListTypeMask & (1 << listType)) { COMMAND_LIST_TYPE commandListType = (COMMAND_LIST_TYPE)listType; if (m_EndedCommandListID[listType] == m_pParent->GetCommandListID(commandListType)) { if (DoNotFlush) { return false; } // convert exceptions to bool result as method is noexcept and SubmitCommandList throws try { m_pParent->SubmitCommandList(commandListType); // throws } catch (_com_error&) { ret = false; } catch (std::bad_alloc&) { ret = false; } } UINT64 LastCompletedFence = m_pParent->GetCompletedFenceValue(commandListType); if (LastCompletedFence < m_EndedCommandListID[listType]) { ret = false; continue; } } } return ret; } //---------------------------------------------------------------------------------------------------------------------------------- D3D12_QUERY_TYPE Query::GetType12() const { switch (m_Type) { case e_QUERY_TIMESTAMP: return D3D12_QUERY_TYPE_TIMESTAMP; case e_QUERY_OCCLUSION: return D3D12_QUERY_TYPE_OCCLUSION; case e_QUERY_PIPELINESTATS: return D3D12_QUERY_TYPE_PIPELINE_STATISTICS; case e_QUERY_STREAMOVERFLOWPREDICATE: // Addition is used to target the other streams and sum return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0; case e_QUERY_STREAMOUTPUTSTATS: // Aliased to stream0 case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM0: case e_QUERY_STREAMOUTPUTSTATS_STREAM0: return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0; case e_QUERY_STREAMOUTPUTSTATS_STREAM1: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM1: return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM1; case e_QUERY_STREAMOUTPUTSTATS_STREAM2: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM2: return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM2; case e_QUERY_STREAMOUTPUTSTATS_STREAM3: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM3: return D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3; // Either Occlusion or BinaryOcclusion could be used // BinaryOcclusion is used for 2 reasons: // 1. To enable test coverage of BinaryOcclusion via wgf11 tests and 11on12 // 2. It is more efficient on some GPUs case e_QUERY_OCCLUSIONPREDICATE: return D3D12_QUERY_TYPE_BINARY_OCCLUSION; case e_QUERY_VIDEO_DECODE_STATISTICS: return D3D12_QUERY_TYPE_VIDEO_DECODE_STATISTICS; default: assert(false); return static_cast(-1); } } //---------------------------------------------------------------------------------------------------------------------------------- D3D12_QUERY_HEAP_TYPE Query::GetHeapType12() const { switch (m_Type) { case e_QUERY_TIMESTAMP: return D3D12_QUERY_HEAP_TYPE_TIMESTAMP; case e_QUERY_OCCLUSIONPREDICATE: case e_QUERY_OCCLUSION: return D3D12_QUERY_HEAP_TYPE_OCCLUSION; case e_QUERY_PIPELINESTATS: return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS; case e_QUERY_STREAMOUTPUTSTATS: case e_QUERY_STREAMOVERFLOWPREDICATE: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM0: case e_QUERY_STREAMOUTPUTSTATS_STREAM0: case e_QUERY_STREAMOUTPUTSTATS_STREAM1: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM1: case e_QUERY_STREAMOUTPUTSTATS_STREAM2: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM2: case e_QUERY_STREAMOUTPUTSTATS_STREAM3: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM3: return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS; case e_QUERY_VIDEO_DECODE_STATISTICS: return D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS; default: assert(false); return static_cast(-1); } } //---------------------------------------------------------------------------------------------------------------------------------- Query::~Query() { for (auto& obj : m_spQueryHeap) { AddToDeferredDeletionQueue(obj); } for (auto& obj : m_spResultBuffer) { if (obj.IsInitialized()) { m_pParent->ReleaseSuballocatedHeap(AllocatorHeapType::Readback, obj, m_LastUsedCommandListID); } } for (auto& obj : m_spPredicationBuffer) { AddToDeferredDeletionQueue(obj); } } //---------------------------------------------------------------------------------------------------------------------------------- UINT Query::GetNumSubQueries() const { switch (m_Type) { // The D3D10 stream-output predicate is unique in that it sums // the results from all 4 streams case e_QUERY_STREAMOVERFLOWPREDICATE: return D3D11_SO_STREAM_COUNT; default: return 1; } } //---------------------------------------------------------------------------------------------------------------------------------- void Query::Initialize() noexcept(false) { // GetNumSubQueries() is > 1 for stream-output queries where 11on12 must accumulate the results from all 4 streams // m_InstancesPerQuery is a constant multiplier for all queries. A new instance is used each time that Suspend/Resume are called D3D12_QUERY_HEAP_DESC QueryHeapDesc = { GetHeapType12(), GetNumSubQueries() * m_InstancesPerQuery, m_pParent->GetNodeMask() }; UINT BufferSize = GetDataSize12() * QueryHeapDesc.Count; // The only query types that allows non-graphics command list type are TIMESTAMP and VIDEO_STATS for now. assert(m_Type == e_QUERY_TIMESTAMP || m_Type == e_QUERY_VIDEO_DECODE_STATISTICS || m_CommandListTypeMask == COMMAND_LIST_TYPE_GRAPHICS_MASK); for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (!(m_CommandListTypeMask & (1 << listType))) { continue; } HRESULT hr = m_pParent->m_pDevice12->CreateQueryHeap( &QueryHeapDesc, IID_PPV_ARGS(&m_spQueryHeap[listType]) ); ThrowFailure(hr); // throw( _com_error ) // Query data goes into a readback heap for CPU readback in GetData { m_spResultBuffer[listType] = m_pParent->AcquireSuballocatedHeap( AllocatorHeapType::Readback, BufferSize, ResourceAllocationContext::FreeThread); // throw( _com_error ) } // For predicates, also create a predication buffer { bool IsPredicate = false; switch (m_Type) { case e_QUERY_OCCLUSION: case e_QUERY_TIMESTAMP: case e_QUERY_PIPELINESTATS: case e_QUERY_STREAMOUTPUTSTATS: case e_QUERY_STREAMOUTPUTSTATS_STREAM0: case e_QUERY_STREAMOUTPUTSTATS_STREAM1: case e_QUERY_STREAMOUTPUTSTATS_STREAM2: case e_QUERY_STREAMOUTPUTSTATS_STREAM3: case e_QUERY_VIDEO_DECODE_STATISTICS: IsPredicate = false; break; // Here D3D12 has a BOOL encoded in 64-bits case e_QUERY_OCCLUSIONPREDICATE: case e_QUERY_STREAMOVERFLOWPREDICATE: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM0: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM1: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM2: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM3: IsPredicate = true; break; default: assert(false); } if (IsPredicate) { D3D12_HEAP_PROPERTIES HeapProp = m_pParent->GetHeapProperties( D3D12_HEAP_TYPE_DEFAULT ); D3D12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer( BufferSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS ); // D3D12_RESOURCE_STATE_PREDICATION is the required state for SetPredication hr = m_pParent->m_pDevice12->CreateCommittedResource( &HeapProp, D3D12_HEAP_FLAG_NONE, &ResourceDesc, D3D12_RESOURCE_STATE_PREDICATION, nullptr, IID_PPV_ARGS(&m_spPredicationBuffer[listType]) ); ThrowFailure(hr); // throw( _com_error ) } } } m_CurrentInstance = 0; } //---------------------------------------------------------------------------------------------------------------------------------- void Query::Suspend() noexcept { assert(m_CurrentInstance < m_InstancesPerQuery); // Store data in the query object, then resolve into the result buffer UINT DataSize12 = GetDataSize12(); UINT NumSubQueries = GetNumSubQueries(); D3D12_QUERY_TYPE QueryType12 = GetType12(); auto DoEndQuery = [&](auto pIface, COMMAND_LIST_TYPE commandListType, UINT subQuery) { UINT Index = QueryIndex(m_CurrentInstance, subQuery, NumSubQueries); pIface->EndQuery( m_spQueryHeap[(UINT)commandListType].get(), static_cast(QueryType12 + subQuery), Index ); pIface->ResolveQueryData( m_spQueryHeap[(UINT)commandListType].get(), static_cast(QueryType12 + subQuery), Index, 1, m_spResultBuffer[(UINT)commandListType].GetResource(), Index * DataSize12 + m_spResultBuffer[(UINT)commandListType].GetOffset() ); }; for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (m_CommandListTypeMask & (1 << listType)) { COMMAND_LIST_TYPE commandListType = (COMMAND_LIST_TYPE)listType; m_pParent->PreRender(commandListType); for (UINT subquery = 0; subquery < NumSubQueries; subquery++) { static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "ImmediateContext::DiscardView must support all command list types."); switch (commandListType) { case COMMAND_LIST_TYPE::GRAPHICS: DoEndQuery(m_pParent->GetGraphicsCommandList(), commandListType, subquery); break; case COMMAND_LIST_TYPE::VIDEO_DECODE: DoEndQuery(m_pParent->GetVideoDecodeCommandList(), commandListType, subquery); break; case COMMAND_LIST_TYPE::VIDEO_PROCESS: DoEndQuery(m_pParent->GetVideoProcessCommandList(), commandListType, subquery); break; } } m_pParent->AdditionalCommandsAdded(commandListType); m_LastUsedCommandListID[listType] = m_pParent->GetCommandListID(commandListType); } } } //---------------------------------------------------------------------------------------------------------------------------------- void Query::Resume() noexcept { BeginInternal(false); } //---------------------------------------------------------------------------------------------------------------------------------- void Query::BeginInternal(bool restart) noexcept { // These query types do not support begin assert(m_Type != e_QUERY_EVENT); assert(m_Type != e_QUERY_TIMESTAMP); assert(m_Type != e_QUERY_VIDEO_DECODE_STATISTICS); UINT NumSubQueries = GetNumSubQueries(); D3D12_QUERY_TYPE QueryType12 = GetType12(); static_assert(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM1 == (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + 1), "Adding enum"); static_assert(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM2 == (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + 2), "Adding enum"); static_assert(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3 == (D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + 3), "Adding enum"); if (restart) { // Restart at the first instance m_CurrentInstance = 0; } else { assert(m_CurrentInstance < m_InstancesPerQuery); // Increment m_CurrentInstance, accumulating if c_InstancesPerQuery would be exceeded AdvanceInstance(); } auto DoBeginQuery = [&](auto pIface, COMMAND_LIST_TYPE commandListType, UINT subQuery) { pIface->BeginQuery( m_spQueryHeap[(UINT)commandListType].get(), static_cast(QueryType12 + subQuery), QueryIndex(m_CurrentInstance, subQuery, NumSubQueries) ); }; for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (m_CommandListTypeMask & (1 << listType)) { COMMAND_LIST_TYPE commandListType = (COMMAND_LIST_TYPE)listType; m_pParent->PreRender(commandListType); for (UINT subquery = 0; subquery < NumSubQueries; subquery++) { static_assert(static_cast(COMMAND_LIST_TYPE::MAX_VALID) == 3u, "Query::BeginInternal must support all command list types."); switch (commandListType) { case COMMAND_LIST_TYPE::GRAPHICS: DoBeginQuery(m_pParent->GetGraphicsCommandList(), commandListType, subquery); break; case COMMAND_LIST_TYPE::VIDEO_DECODE: DoBeginQuery(m_pParent->GetVideoDecodeCommandList(), commandListType, subquery); break; case COMMAND_LIST_TYPE::VIDEO_PROCESS: DoBeginQuery(m_pParent->GetVideoProcessCommandList(), commandListType, subquery); break; } } m_LastUsedCommandListID[(UINT)commandListType] = m_pParent->GetCommandListID(commandListType); } } } //---------------------------------------------------------------------------------------------------------------------------------- void Query::EndInternal() noexcept { // These queries do not have begin/end // They only have end // So they only ever use the first instance if ( e_QUERY_TIMESTAMP == m_Type || e_QUERY_VIDEO_DECODE_STATISTICS == m_Type) { m_CurrentInstance = 0; } assert(m_CurrentInstance < m_InstancesPerQuery); // This type also does not support Begin/End // But it should go through a different EndInternal method assert(e_QUERY_EVENT != m_Type); // Write data for current instance into the result buffer Suspend(); m_CurrentInstance++; assert(m_CurrentInstance <= m_InstancesPerQuery); } //---------------------------------------------------------------------------------------------------------------------------------- void Query::GetInstanceData(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize, UINT InstanceIndex) noexcept { assert(m_CurrentInstance <= m_InstancesPerQuery); if (m_Accumulate) { ThrowFailure(E_INVALIDARG); } if (InstanceIndex >= m_InstancesPerQuery) { ThrowFailure(E_INVALIDARG); } for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (!(m_CommandListTypeMask & (1 << listType))) { continue; } void* pMappedData = nullptr; assert(DataSize == GetDataSize12()); CD3DX12_RANGE ReadRange(DataSize * InstanceIndex, DataSize * (InstanceIndex + 1)); HRESULT hr = m_spResultBuffer[listType].Map( 0, &ReadRange, &pMappedData ); ThrowFailure(hr); switch (m_Type) { case e_QUERY_VIDEO_DECODE_STATISTICS: memcpy(pData, pMappedData, DataSize); break; default: ThrowFailure(E_UNEXPECTED); break; } CD3DX12_RANGE WrittenRange(0, 0); m_spResultBuffer[listType].Unmap(0, &WrittenRange); } } //---------------------------------------------------------------------------------------------------------------------------------- void Query::GetDataInternal(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize) noexcept { assert(m_CurrentInstance <= m_InstancesPerQuery); if (!m_Accumulate) { ThrowFailure(E_UNEXPECTED); } // initialize queries that can be used in multiple command lists if (m_Type == e_QUERY_TIMESTAMP) { if (DataSize < sizeof(UINT64)) { ThrowFailure(E_INVALIDARG); } UINT64 *pDest = reinterpret_cast(pData); *pDest = 0; } for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (!(m_CommandListTypeMask & (1 << listType))) { continue; } if (m_Type != e_QUERY_TIMESTAMP && m_Type != e_QUERY_TIMESTAMPDISJOINT) { m_pParent->GetCommandListManager((COMMAND_LIST_TYPE)listType)->ReadbackInitiated(); } void* pMappedData = nullptr; CD3DX12_RANGE ReadRange(0, DataSize); HRESULT hr = m_spResultBuffer[listType].Map( 0, &ReadRange, &pMappedData ); ThrowFailure(hr); UINT DataSize12 = GetDataSize12(); UINT NumSubQueries = GetNumSubQueries(); // All structures are arrays of 64-bit values assert(0 == (DataSize12 % sizeof(UINT64))); UINT NumCounters = DataSize12 / sizeof(UINT64); UINT64 TempBuffer[12]; static_assert(sizeof(TempBuffer) >= sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS), "Temporary query buffer no large enough."); static_assert(sizeof(TempBuffer) >= sizeof(D3D12_QUERY_DATA_SO_STATISTICS), "Temporary query buffer no large enough."); assert(sizeof(TempBuffer) >= DataSize12); assert(_countof(TempBuffer) >= NumCounters); // Accumulate all instances & subqueries into a single value // If the query was never issued, then 0 will be returned ZeroMemory(TempBuffer, sizeof(TempBuffer)); const UINT64* pSrc = reinterpret_cast(pMappedData); for (UINT Instance = 0; Instance < m_CurrentInstance; Instance++) { for (UINT SubQuery = 0; SubQuery < NumSubQueries; SubQuery++) { for (UINT Counter = 0; Counter < NumCounters; Counter++) { TempBuffer[Counter] += pSrc[0]; pSrc++; } } } switch (m_Type) { // 11 and 12 match, need to interpret & merge values from possibly multiple queues case e_QUERY_TIMESTAMP: { UINT64 Timestamp = (UINT64)TempBuffer[0]; if (Timestamp > *(UINT64 *)pData) { *(UINT64 *)pData = Timestamp; } break; } // For these types, 11 and 12 match case e_QUERY_OCCLUSION: case e_QUERY_PIPELINESTATS: case e_QUERY_STREAMOUTPUTSTATS: case e_QUERY_STREAMOUTPUTSTATS_STREAM0: case e_QUERY_STREAMOUTPUTSTATS_STREAM1: case e_QUERY_STREAMOUTPUTSTATS_STREAM2: case e_QUERY_STREAMOUTPUTSTATS_STREAM3: assert(DataSize == GetDataSize12()); memcpy(pData, TempBuffer, DataSize); break; // Here D3D12 has a BOOL encoded in 64-bits case e_QUERY_OCCLUSIONPREDICATE: { __analysis_assume(DataSize == sizeof(BOOL)); assert(DataSize == sizeof(BOOL)); BOOL Result = (TempBuffer[0] != 0) ? TRUE : FALSE; memcpy(pData, &Result, sizeof(BOOL)); } break; case e_QUERY_STREAMOVERFLOWPREDICATE: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM0: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM1: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM2: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM3: { __analysis_assume(DataSize == sizeof(BOOL)); assert(DataSize == sizeof(BOOL)); BOOL Result = (TempBuffer[1] != TempBuffer[0]) ? TRUE : FALSE; memcpy(pData, &Result, sizeof(BOOL)); } break; default: { assert(false); } break; } CD3DX12_RANGE WrittenRange(0, 0); m_spResultBuffer[listType].Unmap(0, &WrittenRange); } } //---------------------------------------------------------------------------------------------------------------------------------- UINT Query::GetDataSize12() const { // This returns the size of the data written by ResolveQueryData UINT Result = 0; switch (m_Type) { case e_QUERY_OCCLUSIONPREDICATE: case e_QUERY_OCCLUSION: case e_QUERY_TIMESTAMP: Result = sizeof(UINT64); break; case e_QUERY_PIPELINESTATS: Result = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS); break; case e_QUERY_STREAMOUTPUTSTATS: case e_QUERY_STREAMOUTPUTSTATS_STREAM0: case e_QUERY_STREAMOUTPUTSTATS_STREAM1: case e_QUERY_STREAMOUTPUTSTATS_STREAM2: case e_QUERY_STREAMOUTPUTSTATS_STREAM3: case e_QUERY_STREAMOVERFLOWPREDICATE: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM0: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM1: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM2: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM3: Result = sizeof(D3D12_QUERY_DATA_SO_STATISTICS); break; case e_QUERY_VIDEO_DECODE_STATISTICS: Result = sizeof(D3D12_QUERY_DATA_VIDEO_DECODE_STATISTICS); break; default: assert(false); } return Result; } //---------------------------------------------------------------------------------------------------------------------------------- void Query::AdvanceInstance() { // Used during Resume or AutoAdvance to move to the next instance assert(m_CurrentInstance < m_InstancesPerQuery); if ((m_CurrentInstance + 1) < m_InstancesPerQuery) { m_CurrentInstance++; } else if (!m_Accumulate) { // just wrap in this case and move on overwriting old entries m_CurrentInstance = 0; } else { // Out of instances // Wait for the GPU to finish all outstanding work m_pParent->WaitForCompletion(m_CommandListTypeMask); // Accumulate all results into Instance0 void* pMappedData = nullptr; UINT DataSize12 = GetDataSize12(); UINT NumSubQueries = GetNumSubQueries(); CD3DX12_RANGE ReadRange(0, DataSize12 * NumSubQueries * m_InstancesPerQuery); for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (!(m_CommandListTypeMask & (1 << listType))) { continue; } ThrowFailure(m_spResultBuffer[listType].Map(0, &ReadRange, &pMappedData)); // All structures are arrays of 64-bit values assert(0 == (DataSize12 % sizeof(UINT64))); UINT NumCountersPerSubQuery = DataSize12 / sizeof(UINT64); UINT NumCountersPerInstance = NumCountersPerSubQuery * NumSubQueries; UINT64* pInstance0 = reinterpret_cast(pMappedData); for (UINT Instance = 1; Instance <= m_CurrentInstance; Instance++) { const UINT64* pInstance = reinterpret_cast(pMappedData) + (NumCountersPerInstance * Instance); for (UINT i = 0; i < NumCountersPerInstance; i++) { pInstance0[i] += pInstance[i]; } } CD3DX12_RANGE WrittenRange(0, DataSize12 * NumSubQueries); m_spResultBuffer[listType].Unmap(0, &WrittenRange); } // Instance0 has valid data. 11on12 can re-use the data for instance1 and beyond m_CurrentInstance = 1; } assert(m_CurrentInstance < m_InstancesPerQuery); } //---------------------------------------------------------------------------------------------------------------------------------- UINT Query::QueryIndex(UINT Instance, UINT SubQuery, UINT NumSubQueries) { return (Instance * NumSubQueries) + SubQuery; } //---------------------------------------------------------------------------------------------------------------------------------- // Copies data into m_spPredicationBuffer and formats it with a compute shader if necessary void Query::FillPredicationBuffer() { #ifdef USE_PIX PIXSetMarker(m_pParent->GetGraphicsCommandList(), 0ull, L"Transform query data for predication"); #endif // This buffer is created when the query is created assert(m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS]); assert(m_CommandListTypeMask == COMMAND_LIST_TYPE_GRAPHICS_MASK); // Copy from the result buffer to the predication buffer { // Transition the result buffer to the CopySource state AutoTransition AutoTransition1( m_pParent->GetGraphicsCommandList(), m_spResultBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].GetResource(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE ); // Transition the predication buffer to the CopyDest state AutoTransition AutoTransition2( m_pParent->GetGraphicsCommandList(), m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].get(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_STATE_PREDICATION, D3D12_RESOURCE_STATE_COPY_DEST ); UINT BufferSize = GetDataSize12() * GetNumSubQueries() * m_InstancesPerQuery; m_pParent->GetGraphicsCommandList()->CopyBufferRegion( m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].get(), 0, m_spResultBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].GetResource(), m_spResultBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].GetOffset(), BufferSize ); } // Run a compute shader to accumulate all instances & sub-queries. // Also, for stream-output queries, compare NumPrimitivesWritten to PrimitivesStorageNeeded // Create PSO and root signature m_pParent->EnsureQueryResources(); // throw( _com_error ) // Transition the predication buffer to the UAV state AutoTransition AutoTransition( m_pParent->GetGraphicsCommandList(), m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].get(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_STATE_PREDICATION, D3D12_RESOURCE_STATE_UNORDERED_ACCESS ); UINT NumInstances = m_CurrentInstance; UINT NumSubQueries = GetNumSubQueries(); UINT DataSize12 = GetDataSize12(); // All structures are arrays of 64-bit values assert(0 == (DataSize12 % sizeof(UINT64))); UINT NumCounters = DataSize12 / sizeof(UINT64); { UINT Constants[] = { NumInstances, NumSubQueries * NumCounters }; // This accumulates all instances down to instance 0 m_pParent->FormatBuffer( m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].get(), m_pParent->m_pAccumulateQueryPSO.get(), 0, (NumInstances * NumSubQueries * DataSize12) / sizeof(UINT), Constants ); // throw( _com_error ) } // For stream-output, accumulate all streams and convert to BOOL (stored in 64-bits) bool IsStreamOut = false; switch (m_Type) { case e_QUERY_STREAMOVERFLOWPREDICATE: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM0: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM1: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM2: case e_QUERY_STREAMOVERFLOWPREDICATE_STREAM3: IsStreamOut = true; break; } if (IsStreamOut) { // UAV barrier to ensure that the calls to FormatBuffer are ordered m_pParent->UAVBarrier(); UINT Constants[] = { NumSubQueries, 0 }; m_pParent->FormatBuffer( m_spPredicationBuffer[(UINT)COMMAND_LIST_TYPE::GRAPHICS].get(), m_pParent->m_pFormatQueryPSO.get(), 0, (NumSubQueries * DataSize12) / sizeof(UINT), Constants ); // throw( _com_error ) } } //---------------------------------------------------------------------------------------------------------------------------------- void EventQuery::Initialize() noexcept(false) { } //---------------------------------------------------------------------------------------------------------------------------------- void EventQuery::BeginInternal(bool /*restart*/) noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void EventQuery::EndInternal() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void EventQuery::Suspend() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void EventQuery::Resume() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void EventQuery::GetDataInternal(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize) noexcept { UNREFERENCED_PARAMETER(DataSize); __analysis_assume(DataSize == sizeof(BOOL)); assert(DataSize == sizeof(BOOL)); reinterpret_cast(pData)[0] = TRUE; } //---------------------------------------------------------------------------------------------------------------------------------- void TimestampDisjointQuery::Initialize() noexcept(false) { } //---------------------------------------------------------------------------------------------------------------------------------- void TimestampDisjointQuery::BeginInternal(bool /*restart*/) noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void TimestampDisjointQuery::EndInternal() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void TimestampDisjointQuery::Suspend() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void TimestampDisjointQuery::Resume() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- void TimestampDisjointQuery::GetDataInternal(_Out_writes_bytes_(DataSize) void* pData, UINT DataSize) noexcept { UNREFERENCED_PARAMETER(DataSize); __analysis_assume(DataSize == sizeof(QUERY_DATA_TIMESTAMP_DISJOINT)); assert(DataSize == sizeof(QUERY_DATA_TIMESTAMP_DISJOINT)); QUERY_DATA_TIMESTAMP_DISJOINT* pResult = reinterpret_cast(pData); pResult->Frequency = 0; UINT cLists = 0; for (UINT listType = 0; listType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; listType++) { if (m_CommandListTypeMask & (1 << listType)) { UINT64 Frequency = 0; HRESULT hr = m_pParent->GetCommandQueue((COMMAND_LIST_TYPE)listType)->GetTimestampFrequency(&Frequency); UNREFERENCED_PARAMETER(hr); assert(SUCCEEDED(hr)); // this should only fail if called on the wrong queue type if (Frequency > pResult->Frequency) { pResult->Frequency = Frequency; } ++cLists; } } pResult->Disjoint = cLists > 1; } //---------------------------------------------------------------------------------------------------------------------------------- void ConditionalAutoTransition::Init( ID3D12GraphicsCommandList* pCommandList, ID3D12Resource* pResource, UINT Subresource, D3D12_RESOURCE_STATES Before, D3D12_RESOURCE_STATES After) { assert(m_pCommandList == nullptr && pCommandList != nullptr); assert(m_pResource == nullptr && pResource != nullptr); m_pCommandList = pCommandList; m_pResource = pResource; m_Subresource = Subresource; m_Before = Before; m_After = After; D3D12_RESOURCE_BARRIER BarrierDesc; ZeroMemory(&BarrierDesc, sizeof(BarrierDesc)); BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; BarrierDesc.Transition.pResource = m_pResource; BarrierDesc.Transition.Subresource = m_Subresource; BarrierDesc.Transition.StateBefore = m_Before; BarrierDesc.Transition.StateAfter = m_After; m_pCommandList->ResourceBarrier(1, &BarrierDesc); } //---------------------------------------------------------------------------------------------------------------------------------- ConditionalAutoTransition::~ConditionalAutoTransition() { if (m_pResource) { D3D12_RESOURCE_BARRIER BarrierDesc; ZeroMemory(&BarrierDesc, sizeof(BarrierDesc)); BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; BarrierDesc.Transition.pResource = m_pResource; BarrierDesc.Transition.Subresource = m_Subresource; BarrierDesc.Transition.StateBefore = m_After; BarrierDesc.Transition.StateAfter = m_Before; m_pCommandList->ResourceBarrier(1, &BarrierDesc); } } //---------------------------------------------------------------------------------------------------------------------------------- // Runs a compute shader on a buffer UAV (R32_UINT) void ImmediateContext::FormatBuffer( ID3D12Resource* pBuffer, ID3D12PipelineState* pPSO, UINT FirstElement, UINT NumElements, const UINT Constants[NUM_UAV_ROOT_SIG_CONSTANTS] ) noexcept(false) { assert(m_InternalUAVRootSig.Created()); CDisablePredication DisablePredication(this); // Reserve a heap slot for the UAV UINT ViewHeapSlot = ReserveSlots(m_ViewHeap, 1); // throw( _com_error ) D3D12_GPU_DESCRIPTOR_HANDLE GPUDescriptor = m_ViewHeap.GPUHandle(ViewHeapSlot); D3D12_CPU_DESCRIPTOR_HANDLE CPUDescriptor = m_ViewHeap.CPUHandle(ViewHeapSlot); PreRender(COMMAND_LIST_TYPE::GRAPHICS); GetGraphicsCommandList()->SetPipelineState(pPSO); m_StatesToReassert |= e_PipelineStateDirty; GetGraphicsCommandList()->SetComputeRootSignature(m_InternalUAVRootSig.GetRootSignature()); m_StatesToReassert |= e_ComputeBindingsDirty; // All bindings must be re-set after RootSig change D3D12_UNORDERED_ACCESS_VIEW_DESC UAVDesc; ZeroMemory(&UAVDesc, sizeof(UAVDesc)); UAVDesc.Format = DXGI_FORMAT_R32_UINT; UAVDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; UAVDesc.Buffer.FirstElement = FirstElement; UAVDesc.Buffer.NumElements = NumElements; m_pDevice12->CreateUnorderedAccessView( pBuffer, nullptr, &UAVDesc, CPUDescriptor ); GetGraphicsCommandList()->SetComputeRootDescriptorTable(0, GPUDescriptor); GetGraphicsCommandList()->SetComputeRoot32BitConstants(1, NUM_UAV_ROOT_SIG_CONSTANTS, Constants, 0); GetGraphicsCommandList()->Dispatch(1, 1, 1); PostRender(COMMAND_LIST_TYPE::GRAPHICS); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::QueryBegin(Async* pAsync) { pAsync->Begin(); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::QueryEnd(Async* pAsync) { pAsync->End(); } //---------------------------------------------------------------------------------------------------------------------------------- bool TRANSLATION_API ImmediateContext::QueryGetData(Async* pAsync, void* pData, UINT DataSize, bool DoNotFlush, bool AsyncGetData) { return pAsync->GetData(pData, DataSize, DoNotFlush, AsyncGetData); } //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API ImmediateContext::SetPredication(Query* pPredicate, BOOL PredicateValue) { m_CurrentState.m_pPredicate = nullptr; SetPredicationInternal(nullptr, FALSE); m_PredicateValue = PredicateValue; if (pPredicate) { pPredicate->UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); // Copy from the result buffer to the predication buffer // And format the predication buffer if necessary pPredicate->FillPredicationBuffer(); m_CurrentState.m_pPredicate = pPredicate; SetPredicationInternal(pPredicate, PredicateValue); } m_StatesToReassert &= ~(e_PredicateDirty); } //---------------------------------------------------------------------------------------------------------------------------------- void ImmediateContext::SetPredicationInternal(Query* pQuery, BOOL PredicateValue) { ID3D12Resource* pPredicationBuffer = pQuery ? pQuery->GetPredicationBuffer() : nullptr; GetGraphicsCommandList()->SetPredication( pPredicationBuffer, 0, PredicateValue ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO : D3D12_PREDICATION_OP_EQUAL_ZERO ); } }; ================================================ FILE: src/Residency.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { void Internal::LRUCache::TrimToSyncPointInclusive(INT64 CurrentUsage, INT64 CurrentBudget, std::vector &EvictionList, UINT64 FenceValues[]) { LIST_ENTRY* pResourceEntry = ResidentObjectListHead.Flink; while (pResourceEntry != &ResidentObjectListHead) { ManagedObject* pObject = CONTAINING_RECORD(pResourceEntry, ManagedObject, ListEntry); pResourceEntry = pResourceEntry->Flink; if (CurrentUsage < CurrentBudget) { return; } for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (pObject->LastUsedFenceValues[i] > FenceValues[i]) { return; } } assert(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT); if (!pObject->IsPinned()) { EvictionList.push_back(pObject->pUnderlying); Evict(pObject); CurrentUsage -= pObject->Size; } } } void Internal::LRUCache::TrimAgedAllocations(UINT64 FenceValues[], std::vector &EvictionList, UINT64 CurrentTimeStamp, UINT64 MinDelta) { LIST_ENTRY* pResourceEntry = ResidentObjectListHead.Flink; while (pResourceEntry != &ResidentObjectListHead) { ManagedObject* pObject = CONTAINING_RECORD(pResourceEntry, ManagedObject, ListEntry); pResourceEntry = pResourceEntry->Flink; if (CurrentTimeStamp - pObject->LastUsedTimestamp <= MinDelta) // Don't evict things which have been used recently { return; } for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (pObject->LastUsedFenceValues[i] > FenceValues[i]) { return; } } assert(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT); if (!pObject->IsPinned()) { EvictionList.push_back(pObject->pUnderlying); Evict(pObject); } } } void Internal::LRUCache::TrimUnusedAllocationsSinceLastNotificationPeriod(UINT64 CurrentPeriodicTrimNotificationIndex, UINT64 FenceValues[], std::vector& EvictionList, UINT64& BytesToEvict) { LIST_ENTRY* pResourceEntry = ResidentObjectListHead.Flink; while (pResourceEntry != &ResidentObjectListHead) { ManagedObject* pObject = CONTAINING_RECORD(pResourceEntry, ManagedObject, ListEntry); pResourceEntry = pResourceEntry->Flink; // List is LRU-sorted, this object is still in use on any command queue fence, so we're done for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (pObject->LastUsedFenceValues[i] > FenceValues[i]) { return; } } assert(pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::RESIDENT); /* If the object has not been used for at least one full periodic trim cycle and is not pinned, it is eligible for eviction */ if (pObject->LastUsedPeriodicTrimNotificationIndex < (CurrentPeriodicTrimNotificationIndex - 1) && !pObject->IsPinned()) { EvictionList.push_back(pObject->pUnderlying); BytesToEvict += pObject->Size; Evict(pObject); } } } void APIENTRY ResidencyManager::PeriodicTrimNotificationCallback(const D3D12_TRIM_NOTIFICATION* pData) { ResidencyManager* pResidencyManager = reinterpret_cast(pData->pContext); assert(pResidencyManager != nullptr); // A lock must be taken here as the state of the objects will be altered // and this also gives us exclusive access with ProcessPagingWork and other // functions that modify the residency manager state. std::lock_guard Lock(pResidencyManager->Mutex); // Always increase the index even if we don't do a trim this time pResidencyManager->PeriodicTrimNotificationIndex++; // Collect the fence values to be used for the call to TrimUnusedAllocationsSinceLastNotificationPeriod // or TrimToSyncPointInclusive below which prevents evicting objects that are still in use on the GPU. UINT64 WaitedFenceValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { WaitedFenceValues[i] = pResidencyManager->ImmCtx.GetCompletedFenceValue((COMMAND_LIST_TYPE)i); } // Clear the eviction list, collect the elements to be trimmed from // depending on the flags in pData, then evict them if there are any. // and clear the list again afterwards before returning. pResidencyManager->EvictionList.clear(); UINT64 BytesToEvict = 0u; if (pData->Flags & D3D12_TRIM_NOTIFICATION_FLAG_PERIODIC_TRIM) { pResidencyManager->LRU.TrimUnusedAllocationsSinceLastNotificationPeriod( pResidencyManager->PeriodicTrimNotificationIndex, WaitedFenceValues, pResidencyManager->EvictionList, BytesToEvict ); } if (pData->Flags & D3D12_TRIM_NOTIFICATION_FLAG_TRIM_TO_BUDGET) { // Try to free pData->NumBytesToTrim bytes. LARGE_INTEGER CurrentTime = {}; QueryPerformanceCounter(&CurrentTime); DXCoreAdapterMemoryBudget LocalMemory = {}; pResidencyManager->GetCurrentBudget(CurrentTime.QuadPart, &LocalMemory); UINT64 CurrentUsage = (LocalMemory.currentUsage >= BytesToEvict) ? (LocalMemory.currentUsage - BytesToEvict) : 0u; UINT64 BytesToTrim = min(pData->NumBytesToTrim, CurrentUsage); UINT64 TargetBudget = CurrentUsage - BytesToTrim; if (BytesToTrim > 0) { pResidencyManager->LRU.TrimToSyncPointInclusive( CurrentUsage, TargetBudget, pResidencyManager->EvictionList, WaitedFenceValues ); } } // If there are any objects to evict, do so now // and clear the eviction list afterwards. if (!pResidencyManager->EvictionList.empty()) { [[maybe_unused]] HRESULT hrEvict = pResidencyManager->Device->Evict((UINT)pResidencyManager->EvictionList.size(), pResidencyManager->EvictionList.data()); assert(SUCCEEDED(hrEvict)); pResidencyManager->EvictionList.clear(); } } ResidencyManager::~ResidencyManager() { if (PeriodicTrimCallbackCookie != c_PeriodicTrimCallbackCookie_Unregistered) { [[maybe_unused]] HRESULT hr = Device15->UnregisterTrimNotificationCallback(PeriodicTrimCallbackCookie); assert(SUCCEEDED(hr)); } } HRESULT ResidencyManager::Initialize(UINT DeviceNodeIndex, IDXCoreAdapter *ParentAdapterDXCore, IDXGIAdapter3 *ParentAdapterDXGI) { NodeIndex = DeviceNodeIndex; AdapterDXCore = ParentAdapterDXCore; AdapterDXGI = ParentAdapterDXGI; if (FAILED(ImmCtx.m_pDevice12->QueryInterface(&Device))) { return E_NOINTERFACE; } LARGE_INTEGER Frequency; QueryPerformanceFrequency(&Frequency); // Calculate how many QPC ticks are equivalent to the given time in seconds MinEvictionGracePeriodTicks = UINT64(Frequency.QuadPart * cMinEvictionGracePeriod); MaxEvictionGracePeriodTicks = UINT64(Frequency.QuadPart * cMaxEvictionGracePeriod); BudgetQueryPeriodTicks = UINT64(Frequency.QuadPart * cBudgetQueryPeriod); HRESULT hr = S_OK; hr = AsyncThreadFence.Initialize(Device); // Register for Trim Notification Callback if supported by the OS // or ignore the failure and just not do periodic trims on OS that don't support it. if (SUCCEEDED(Device->QueryInterface(&Device15))) { D3D12_REGISTER_TRIM_NOTIFICATION registerArgs = { &PeriodicTrimNotificationCallback, this, 0 }; if (SUCCEEDED(Device15->RegisterTrimNotificationCallback(®isterArgs))) { PeriodicTrimCallbackCookie = registerArgs.CallbackCookie; } } return hr; } HRESULT ResidencyManager::ProcessPagingWork(UINT CommandListIndex, ResidencySet *pMasterSet) { // the size of all the objects which will need to be made resident in order to execute this set. UINT64 SizeToMakeResident = 0; LARGE_INTEGER CurrentTime; QueryPerformanceCounter(&CurrentTime); HRESULT hr = S_OK; { // A lock must be taken here as the state of the objects will be altered std::lock_guard Lock(Mutex); MakeResidentList.reserve(pMasterSet->Set.size()); EvictionList.reserve(LRU.NumResidentObjects); // Mark the objects used by this command list to be made resident for (auto pObject : pMasterSet->Set) { // If it's evicted we need to make it resident again if (pObject->ResidencyStatus == ManagedObject::RESIDENCY_STATUS::EVICTED) { MakeResidentList.push_back({ pObject }); LRU.MakeResident(pObject); SizeToMakeResident += pObject->Size; } // Update the last sync point that this was used on // Note: This can be used for app command queues as well, but in that case, they'll // be pinned rather than relying on this implicit sync point tracking. if (CommandListIndex < (UINT)COMMAND_LIST_TYPE::MAX_VALID) { pObject->LastUsedFenceValues[CommandListIndex] = ImmCtx.GetCommandListID((COMMAND_LIST_TYPE)CommandListIndex); } pObject->LastUsedTimestamp = CurrentTime.QuadPart; pObject->LastUsedPeriodicTrimNotificationIndex = PeriodicTrimNotificationIndex; LRU.ObjectReferenced(pObject); } DXCoreAdapterMemoryBudget LocalMemory; ZeroMemory(&LocalMemory, sizeof(LocalMemory)); GetCurrentBudget(CurrentTime.QuadPart, &LocalMemory); UINT64 EvictionGracePeriod = GetCurrentEvictionGracePeriod(&LocalMemory); UINT64 LastSubmittedFenceValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; UINT64 WaitedFenceValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { LastSubmittedFenceValues[i] = ImmCtx.GetCommandListID((COMMAND_LIST_TYPE)i) - 1; WaitedFenceValues[i] = ImmCtx.GetCompletedFenceValue((COMMAND_LIST_TYPE)i); } LRU.TrimAgedAllocations(WaitedFenceValues, EvictionList, CurrentTime.QuadPart, EvictionGracePeriod); if (!EvictionList.empty()) { [[maybe_unused]] HRESULT hrEvict = Device->Evict((UINT)EvictionList.size(), EvictionList.data()); assert(SUCCEEDED(hrEvict)); EvictionList.clear(); } if (!MakeResidentList.empty()) { UINT32 ObjectsMadeResident = 0; UINT32 MakeResidentIndex = 0; while (true) { INT64 TotalUsage = LocalMemory.currentUsage; INT64 TotalBudget = LocalMemory.budget; INT64 AvailableSpace = TotalBudget - TotalUsage; UINT64 BatchSize = 0; UINT32 NumObjectsInBatch = 0; UINT32 BatchStart = MakeResidentIndex; if (AvailableSpace > 0) { assert(MakeResidentList.size() < MAXUINT32); for (UINT32 i = MakeResidentIndex; i < static_cast(MakeResidentList.size()); i++) { // If we try to make this object resident, will we go over budget? if (BatchSize + MakeResidentList[i].pManagedObject->Size > UINT64(AvailableSpace)) { // Next time we will start here MakeResidentIndex = i; break; } else { BatchSize += MakeResidentList[i].pManagedObject->Size; NumObjectsInBatch++; ObjectsMadeResident++; MakeResidentList[i].pUnderlying = MakeResidentList[i].pManagedObject->pUnderlying; } } hr = Device->EnqueueMakeResident(D3D12_RESIDENCY_FLAG_NONE, NumObjectsInBatch, &MakeResidentList[BatchStart].pUnderlying, AsyncThreadFence.pFence, AsyncThreadFence.FenceValue + 1); if (SUCCEEDED(hr)) { AsyncThreadFence.Increment(); SizeToMakeResident -= BatchSize; } } if (FAILED(hr) || ObjectsMadeResident != MakeResidentList.size()) { ManagedObject *pResidentHead = LRU.GetResidentListHead(); while (pResidentHead && pResidentHead->IsPinned()) { pResidentHead = CONTAINING_RECORD(pResidentHead->ListEntry.Flink, ManagedObject, ListEntry); } // If there is nothing to trim OR the only objects 'Resident' are the ones about to be used by this execute. bool ForceResidency = pResidentHead == nullptr; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID && !ForceResidency; ++i) { ForceResidency = pResidentHead->LastUsedFenceValues[i] > LastSubmittedFenceValues[i]; } if (ForceResidency) { // Make resident the rest of the objects as there is nothing left to trim UINT32 NumObjects = (UINT32)MakeResidentList.size() - ObjectsMadeResident; // Gather up the remaining underlying objects for (size_t i = MakeResidentIndex; i < MakeResidentList.size(); i++) { MakeResidentList[i].pUnderlying = MakeResidentList[i].pManagedObject->pUnderlying; } hr = Device->EnqueueMakeResident(D3D12_RESIDENCY_FLAG_NONE, NumObjects, &MakeResidentList[MakeResidentIndex].pUnderlying, AsyncThreadFence.pFence, AsyncThreadFence.FenceValue + 1); if (SUCCEEDED(hr)) { AsyncThreadFence.Increment(); } if (FAILED(hr)) { // TODO: What should we do if this fails? This is a catastrophic failure in which the app is trying to use more memory // in 1 command list than can possibly be made resident by the system. assert(SUCCEEDED(hr)); } break; } // Wait until the GPU is done UINT64 *FenceValuesToWaitFor = pResidentHead ? pResidentHead->LastUsedFenceValues : LastSubmittedFenceValues; WaitForSyncPoint(FenceValuesToWaitFor); std::copy(FenceValuesToWaitFor, FenceValuesToWaitFor + (UINT)COMMAND_LIST_TYPE::MAX_VALID, WaitedFenceValues); EvictionList.clear(); LRU.TrimToSyncPointInclusive(TotalUsage + INT64(SizeToMakeResident), TotalBudget, EvictionList, WaitedFenceValues); [[maybe_unused]] HRESULT hrEvict = Device->Evict((UINT)EvictionList.size(), EvictionList.data()); assert(SUCCEEDED(hrEvict)); } else { // We made everything resident, mission accomplished break; } } } MakeResidentList.clear(); EvictionList.clear(); return hr; } } static void GetDXCoreBudget(IDXCoreAdapter *AdapterDXCore, UINT NodeIndex, DXCoreAdapterMemoryBudget *InfoOut, DXCoreSegmentGroup Segment) { DXCoreAdapterMemoryBudgetNodeSegmentGroup InputParams = {}; InputParams.nodeIndex = NodeIndex; InputParams.segmentGroup = Segment; [[maybe_unused]] HRESULT hr = AdapterDXCore->QueryState(DXCoreAdapterState::AdapterMemoryBudget, &InputParams, InfoOut); assert(SUCCEEDED(hr)); } static void GetDXGIBudget(IDXGIAdapter3 *AdapterDXGI, UINT NodeIndex, DXGI_QUERY_VIDEO_MEMORY_INFO *InfoOut, DXGI_MEMORY_SEGMENT_GROUP Segment) { [[maybe_unused]] HRESULT hr = AdapterDXGI->QueryVideoMemoryInfo(NodeIndex, Segment, InfoOut); assert(SUCCEEDED(hr)); } void ResidencyManager::GetCurrentBudget(UINT64 Timestamp, DXCoreAdapterMemoryBudget* InfoOut) { if (Timestamp - LastBudgetTimestamp >= BudgetQueryPeriodTicks) { LastBudgetTimestamp = Timestamp; if (AdapterDXCore) { DXCoreAdapterMemoryBudget Local, Nonlocal; GetDXCoreBudget(AdapterDXCore, NodeIndex, &Local, DXCoreSegmentGroup::Local); GetDXCoreBudget(AdapterDXCore, NodeIndex, &Nonlocal, DXCoreSegmentGroup::NonLocal); CachedBudget.currentUsage = Local.currentUsage + Nonlocal.currentUsage; CachedBudget.budget = Local.budget + Nonlocal.budget; } else { DXGI_QUERY_VIDEO_MEMORY_INFO Local, Nonlocal; GetDXGIBudget(AdapterDXGI, NodeIndex, &Local, DXGI_MEMORY_SEGMENT_GROUP_LOCAL); GetDXGIBudget(AdapterDXGI, NodeIndex, &Nonlocal, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL); CachedBudget.currentUsage = Local.CurrentUsage + Nonlocal.CurrentUsage; CachedBudget.budget = Local.Budget + Nonlocal.Budget; } } *InfoOut = CachedBudget; } void ResidencyManager::WaitForSyncPoint(UINT64 FenceValues[]) { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { ImmCtx.WaitForFenceValue((COMMAND_LIST_TYPE)i, FenceValues[i]); } } } ================================================ FILE: src/Resource.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //================================================================================================================================== // AppResourceDesc //================================================================================================================================== //---------------------------------------------------------------------------------------------------------------------------------- AppResourceDesc::AppResourceDesc(const D3D12_RESOURCE_DESC &desc12, D3D12TranslationLayer::RESOURCE_USAGE Usage, DWORD Access, DWORD BindFlags) { UINT16 depth = desc12.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE3D ? 1 : desc12.DepthOrArraySize; UINT16 arraySize = desc12.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? 1 : desc12.DepthOrArraySize; UINT8 nonOpaquePlaneCount = (UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(desc12.Format); UINT numSubresources = desc12.MipLevels * arraySize * nonOpaquePlaneCount; *this = AppResourceDesc( desc12.MipLevels * arraySize, nonOpaquePlaneCount, numSubresources, (UINT8)desc12.MipLevels, arraySize, depth, (UINT)desc12.Width, (UINT)desc12.Height, desc12.Format, desc12.SampleDesc.Count, desc12.SampleDesc.Quality, Usage, (D3D12TranslationLayer::RESOURCE_CPU_ACCESS)Access, (D3D12TranslationLayer::RESOURCE_BIND_FLAGS)BindFlags, desc12.Dimension); } //================================================================================================================================== // Resource //================================================================================================================================== //---------------------------------------------------------------------------------------------------------------------------------- void TRANSLATION_API Resource::Create(ResourceAllocationContext threadingContext) noexcept(false) { SetWaitForCompletionRequired(true); bool& ownsResource = m_Identity->m_bOwnsUnderlyingResource; ownsResource = (m_creationArgs.m_heapDesc.Properties.Type == D3D12_HEAP_TYPE_DEFAULT || m_creationArgs.m_heapDesc.Properties.Type == D3D12_HEAP_TYPE_CUSTOM || OwnsReadbackHeap() || IsLockableSharedBuffer() || (AppDesc()->Usage() == RESOURCE_USAGE_DYNAMIC && (Parent()->ResourceDimension12() != D3D12_RESOURCE_DIMENSION_BUFFER))); // Create/retrieve the resource if (ownsResource) { CreateUnderlying(threadingContext); // throw( _com_error, bad_alloc ) } else { // Stream-output only supported in default heaps assert(!m_creationArgs.m_bBoundForStreamOut); if (IsDecoderCompressedBuffer()) { m_Identity->m_MaxOutstandingResources = MAX_OUTSTANDING_DECODER_COMPRESSED_BUFFERS; } m_Identity->m_suballocation = m_pParent->AcquireSuballocatedHeapForResource(this, threadingContext); for (auto& placement : m_SubresourcePlacement) { placement.Offset += m_Identity->GetSuballocatedOffset(); } // Note: This clear routine simply releases the container's references on the preallocated memory, // and destructs any objects contained within. It leaves the memory allocated. // // In the future, we may want to revisit the allocation routines below with understanding of the // "ownsResource" computation above, either recalculating it to compute/initialize the preallocated arrays, // or precalculate it prior to allocating and initializing the Resource class. m_DynamicTexturePlaneData.clear(); m_spCurrentCpuHeaps.clear(); } // Update the state UnderlyingResourceChanged(); // throw( _com_error ) m_isValid = true; } //---------------------------------------------------------------------------------------------------------------------------------- inline UINT8 GetSubresourceMultiplier(ResourceCreationArgs const& createArgs) noexcept { return CD3D11FormatHelper::FamilySupportsStencil(createArgs.m_appDesc.Format()) ? 2 : 1; } inline UINT GetTotalSubresources(ResourceCreationArgs const& createArgs) noexcept { return createArgs.m_appDesc.Subresources() * GetSubresourceMultiplier(createArgs); } inline UINT GetSubresourcesForTransitioning(ResourceCreationArgs const& createArgs) noexcept { return (createArgs.m_appDesc.Usage() == RESOURCE_USAGE_STAGING || createArgs.ApiTextureLayout12() == D3D12_TEXTURE_LAYOUT_ROW_MAJOR) ? 1u : GetTotalSubresources(createArgs); } inline UINT GetSubresourcesForFormatEmulationStagingAllocation(ResourceCreationArgs const& createArgs) noexcept { assert(createArgs.m_FormatEmulation == FormatEmulation::YV12 || createArgs.m_FormatEmulation == FormatEmulation::None); if ( createArgs.m_appDesc.Usage() == RESOURCE_USAGE_STAGING || createArgs.m_appDesc.Usage() == static_cast(D3D11_USAGE_DYNAMIC)) { if (GetSubresourceMultiplier(createArgs) > 1) { return GetTotalSubresources(createArgs); } } return 0u; } inline UINT GetSubresourcesForFormatEmulationStagingData(ResourceCreationArgs const& createArgs) noexcept { return ( ( createArgs.m_appDesc.Usage() == RESOURCE_USAGE_STAGING || createArgs.m_appDesc.Usage() == static_cast(D3D11_USAGE_DYNAMIC)) && ( GetSubresourceMultiplier(createArgs) > 1 || createArgs.m_FormatEmulation != FormatEmulation::None)) ? GetTotalSubresources(createArgs) : 0u; } inline UINT GetSubresourcesForTilingData(ResourceCreationArgs const& createArgs) noexcept { // Preallocate assuming no packed mips. return (createArgs.m_flags11.MiscFlags & D3D11_RESOURCE_MISC_TILED) ? createArgs.m_appDesc.MipLevels() : 0u; } inline UINT GetSubresourcesForDynamicTexturePlaneData(ResourceCreationArgs const& createArgs) noexcept { return (createArgs.m_appDesc.Usage() == static_cast(D3D11_USAGE_DYNAMIC)) ? createArgs.m_appDesc.Subresources() / createArgs.m_appDesc.NonOpaquePlaneCount() : 0u; } inline UINT GetSubresourcesForCpuHeaps(ResourceCreationArgs const& createArgs) noexcept { return createArgs.m_appDesc.CPUAccessFlags() ? GetSubresourcesForDynamicTexturePlaneData(createArgs) : 0u; } inline bool IsSimultaneousAccess(ResourceCreationArgs const& createArgs) noexcept { // TODO: Some resource types add in simultaneous access during CreateUnderlying // We should refactor so that the desc doesn't change after construction... return ((createArgs.m_desc12.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) != D3D12_RESOURCE_FLAG_NONE || createArgs.m_desc12.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) && (createArgs.m_appDesc.Usage() == static_cast(D3D11_USAGE_DEFAULT) || createArgs.m_appDesc.Usage() == static_cast(D3D11_USAGE_IMMUTABLE)); } //---------------------------------------------------------------------------------------------------------------------------------- size_t Resource::CalcPreallocationSize(ResourceCreationArgs const& createArgs) { auto SubresourceCount = GetTotalSubresources(createArgs); return sizeof(Resource) + TransitionableResourceBase::CalcPreallocationSize(GetSubresourcesForTransitioning(createArgs)) + CResourceBindings::CalcPreallocationSize(SubresourceCount) + sizeof(unique_comptr) * GetSubresourcesForCpuHeaps(createArgs) + sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) * SubresourceCount + sizeof(SEmulatedFormatSubresourceStagingAllocation) * GetSubresourcesForFormatEmulationStagingAllocation(createArgs) + sizeof(SEmulatedFormatSubresourceStagingData) * GetSubresourcesForFormatEmulationStagingData(createArgs) + sizeof(UINT64) * GetSubresourcesForCpuHeaps(createArgs) + sizeof(D3D12_SUBRESOURCE_TILING) * GetSubresourcesForTilingData(createArgs) + sizeof(DynamicTexturePlaneData) * GetSubresourcesForDynamicTexturePlaneData(createArgs); } //---------------------------------------------------------------------------------------------------------------------------------- Resource::Resource(ImmediateContext* pDevice, ResourceCreationArgs& createArgs, void*& pPreallocatedMemory) noexcept(false) : DeviceChild(pDevice) , TransitionableResourceBase(GetSubresourcesForTransitioning(createArgs), createArgs.m_bTriggerDeferredWaits, pPreallocatedMemory) , m_SubresourceMultiplier(GetSubresourceMultiplier(createArgs)) , m_creationArgs(createArgs) , m_Identity(AllocateResourceIdentity(NumSubresources(), IsSimultaneousAccess(createArgs))) // throw( bad_alloc ) , m_currentBindings(NumSubresources(), AppDesc()->BindFlags(), pPreallocatedMemory) , m_MinLOD(0.0f) , m_SRVUniqueness(0) , m_AllUniqueness(0) , m_OffsetToStreamOutputSuffix(0) , m_spCurrentCpuHeaps(GetSubresourcesForCpuHeaps(createArgs), pPreallocatedMemory) , m_SubresourcePlacement(NumSubresources(), pPreallocatedMemory) , m_FormatEmulationStagingAllocation(GetSubresourcesForFormatEmulationStagingAllocation(createArgs), pPreallocatedMemory) , m_FormatEmulationStagingData(GetSubresourcesForFormatEmulationStagingData(createArgs), pPreallocatedMemory) , m_LastCommandListID(GetSubresourcesForCpuHeaps(createArgs), pPreallocatedMemory) , m_TiledResource(GetSubresourcesForTilingData(createArgs), pPreallocatedMemory) , m_DynamicTexturePlaneData(GetSubresourcesForDynamicTexturePlaneData(createArgs), pPreallocatedMemory) { m_effectiveUsage = AppDesc()->Usage(); // Default row-major textures are unsupported by D3D12 (except for cross-adapter), therefore // they are emulated using buffers and should typically be handled similarly to staging textures. if (m_effectiveUsage == static_cast(D3D11_USAGE_DEFAULT) && Parent()->ResourceDimension12() != D3D12_RESOURCE_DIMENSION_BUFFER && Parent()->ApiTextureLayout12() == D3D12_TEXTURE_LAYOUT_ROW_MAJOR) { m_effectiveUsage = RESOURCE_USAGE_STAGING; } InitializeSubresourceDescs(); // throw( _com_error ) } //---------------------------------------------------------------------------------------------------------------------------------- unique_comptr Resource::AllocateResource(ImmediateContext* pDevice, ResourceCreationArgs& createArgs) { struct VoidDeleter { void operator()(void* p) { operator delete(p); } }; size_t ObjectSize = CalcPreallocationSize(createArgs); std::unique_ptr spMemory(operator new(ObjectSize)); void* pPreallocatedMemory = reinterpret_cast(spMemory.get()) + 1; unique_comptr spResource(new (spMemory.get()) Resource(pDevice, createArgs, pPreallocatedMemory)); spMemory.release(); // This can fire if CalcPreallocationSize doesn't account for all preallocated arrays, or if the void*& decays to void* assert(reinterpret_cast(pPreallocatedMemory) == reinterpret_cast(spResource.get()) + ObjectSize); return std::move(spResource); } //---------------------------------------------------------------------------------------------------------------------------------- unique_comptr Resource::CreateResource(ImmediateContext* pDevice, ResourceCreationArgs& createArgs, ResourceAllocationContext threadingContext) { auto spResource = AllocateResource(pDevice, createArgs); spResource->Create(threadingContext); return std::move(spResource); } //---------------------------------------------------------------------------------------------------------------------------------- unique_comptr Resource::OpenResource( ImmediateContext* pParent, ResourceCreationArgs& creationArgs, _In_ IUnknown *pResource, DeferredDestructionType deferredDestructionType, _In_ D3D12_RESOURCE_STATES currentState) { unique_comptr spUnderlyingResource = AllocateResource(pParent, creationArgs); { D3D12TranslationLayer::Resource::SResourceIdentity* pIdentity = spUnderlyingResource->GetIdentity(); pIdentity->m_bSharedResource = true; spUnderlyingResource->SetWaitForCompletionRequired(deferredDestructionType == D3D12TranslationLayer::DeferredDestructionType::Completion); // Resource/heap has one ref, owned by DXGIOn12 through the KM resource handle // Detach cannot be invoked until no more failures can happen, or else the runtime may early release another shared resource // These QIs will take additional refs, so DXGIOn12's ref must be released when the KM resource handle is detached if (FAILED(pResource->QueryInterface(&pIdentity->m_spUnderlyingResource))) { // Tile pool D3D12TranslationLayer::unique_comptr spHeap; if (FAILED(pResource->QueryInterface(&spHeap))) { ThrowFailure(E_INVALIDARG); } spUnderlyingResource->AddHeapToTilePool(std::move(spHeap)); } spUnderlyingResource->UnderlyingResourceChanged(); // throw( _com_error ) if (!spUnderlyingResource->m_Identity->m_currentState.SupportsSimultaneousAccess()) { CCurrentResourceState::ExclusiveState ExclusiveState = { 0, currentState, COMMAND_LIST_TYPE::GRAPHICS }; spUnderlyingResource->m_Identity->m_currentState.SetExclusiveResourceState(ExclusiveState); } if (spUnderlyingResource->m_creationArgs.m_flags11.MiscFlags & D3D11_RESOURCE_MISC_TILED) { spUnderlyingResource->InitializeTilingData(); } } // No more exceptions spUnderlyingResource->m_isValid = true; return std::move(spUnderlyingResource); } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::InitializeTilingData() noexcept { // Query tiling data D3D12_PACKED_MIP_INFO PackedMipDesc; m_pParent->m_pDevice12->GetResourceTiling( m_Identity->GetResource(), &m_TiledResource.m_NumTilesForResource, &PackedMipDesc, nullptr, nullptr, 0, nullptr); m_TiledResource.m_NumTilesForPackedMips = PackedMipDesc.NumTilesForPackedMips; if (Parent()->ResourceDimension12() == D3D12_RESOURCE_DIMENSION_BUFFER) { PackedMipDesc.NumStandardMips = 1; } // Validate some assumptions: // We should only need to store either number of packed mips or number of standard mips assert(PackedMipDesc.NumPackedMips == AppDesc()->MipLevels() - PackedMipDesc.NumStandardMips); // Resources with packed mips cannot have array slices assert(PackedMipDesc.NumPackedMips == 0 || AppDesc()->ArraySize() == 1); // 11on12 is not robust against planar tiled assert(AppDesc()->NonOpaquePlaneCount() == 1 && m_SubresourceMultiplier == 1); m_TiledResource.m_NumStandardMips = PackedMipDesc.NumStandardMips; m_pParent->m_pDevice12->GetResourceTiling( m_Identity->GetResource(), nullptr, nullptr, nullptr, &m_TiledResource.m_NumStandardMips, 0, m_TiledResource.m_SubresourceTiling.begin()); } //---------------------------------------------------------------------------------------------------------------------------------- Resource::~Resource() noexcept { if (!m_isValid) { m_Identity.reset(nullptr); } m_pParent->ReturnAllBuffersToPool(*this); // If this resource was ever valid, ensure that it either still has its owned heap/resource, or that it returned its borrowed resource assert(m_Identity.get() == nullptr || // The identity is deleted early in the case of failed initialization m_Identity->m_bOwnsUnderlyingResource == (m_Identity->m_spUnderlyingResource.get() != nullptr) || !m_TilePool.m_Allocations.empty()); if (m_TiledResource.m_pTilePool != nullptr) { // Assuming that destruction ordering between tiled resource and tile pool is correct, // we need to ensure that the tile pool's destruction is deferred at least as long as the resource. for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { m_TiledResource.m_pTilePool->MarkUsedInCommandListIfNewer((COMMAND_LIST_TYPE)i, m_LastUsedCommandListID[i]); } } if (m_Identity) { if (m_Identity->m_bOwnsUnderlyingResource) { if (m_Identity->GetOwnedResource()) { m_pParent->AddResourceToDeferredDeletionQueue(GetIdentity()->GetOwnedResource(), std::move(m_Identity->m_pResidencyHandle), m_LastUsedCommandListID, m_bWaitForCompletionRequired, std::move(m_ResourceDeferredWaits)); } else { for (auto& allocation : m_TilePool.m_Allocations) { AddToDeferredDeletionQueue(allocation.m_spUnderlyingBufferHeap); AddToDeferredDeletionQueue(allocation.m_spUnderlyingTextureHeap); } } } } m_isValid = false; // Tag deleted resources for easy inspection in a debugger } void Resource::AddToResidencyManager(bool bIsResident) { if (m_Identity && !m_Identity->m_bSharedResource && m_Identity->m_bOwnsUnderlyingResource && m_Identity->GetOwnedResource() != nullptr) { auto &residencyManager = m_pParent->GetResidencyManager(); m_Identity->m_pResidencyHandle = std::unique_ptr(new ResidencyManagedObjectWrapper(residencyManager)); D3D12_RESOURCE_DESC resourceDesc12 = m_creationArgs.m_desc12; D3D12_RESOURCE_ALLOCATION_INFO allocInfo = m_pParent->m_pDevice12->GetResourceAllocationInfo(m_pParent->GetNodeMask(), 1, &resourceDesc12); m_Identity->m_pResidencyHandle->Initialize(m_Identity->GetResource(), allocInfo.SizeInBytes, bIsResident); } } class SwapChainAssistant : public ID3D12SwapChainAssistant { private: ~SwapChainAssistant() = default; public: SwapChainAssistant() { if (!AllocateLocallyUniqueId(&m_LUID)) throw std::bad_alloc(); } STDMETHOD_(ULONG, AddRef)() { return InterlockedIncrement(&m_RefCount); } STDMETHOD_(ULONG, Release)() { ULONG ret = InterlockedDecrement(&m_RefCount); if (ret == 0) delete this; return ret; } STDMETHOD(QueryInterface)(REFIID riid, void** ppv) { if (InlineIsEqualGUID(riid, __uuidof(ID3D12SwapChainAssistant)) || InlineIsEqualUnknown(riid)) { AddRef(); *ppv = this; return S_OK; } return E_NOINTERFACE; } STDMETHOD_(LUID, GetLUID)() { return m_LUID; } STDMETHOD(GetSwapChainObject)(REFIID, void**) { return E_NOTIMPL; } STDMETHOD(GetCurrentResourceAndCommandQueue)(REFIID, void**, REFIID, void**) { return E_NOTIMPL; } STDMETHOD(InsertImplicitSync)() { return E_NOTIMPL; } volatile ULONG m_RefCount = 0; LUID m_LUID; }; //---------------------------------------------------------------------------------------------------------------------------------- void Resource::CreateUnderlying(ResourceAllocationContext threadingContext) noexcept(false) { assert(m_Identity->m_bOwnsUnderlyingResource); assert(0 == m_OffsetToStreamOutputSuffix); m_OffsetToStreamOutputSuffix = m_creationArgs.m_OffsetToStreamOutputSuffix; D3D12_RESOURCE_DESC& Desc12 = m_creationArgs.m_desc12; D3D12_HEAP_DESC& HeapDesc = m_creationArgs.m_heapDesc; D3D11_RESOURCE_FLAGS& Flags11 = m_creationArgs.m_flags11; if (AppDesc()->Usage() == RESOURCE_USAGE_DYNAMIC && (Desc12.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER || IsLockableSharedBuffer())) { HeapDesc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; } if (HeapDesc.Properties.Type != D3D12_HEAP_TYPE_CUSTOM) { HeapDesc.Properties = m_pParent->GetHeapProperties(HeapDesc.Properties.Type); } // D3D11 allowed SRV-only MSAA resources, but D3D12 requires RTV/DSV if (Desc12.SampleDesc.Count > 1 && (Desc12.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) == 0) { Desc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; } if (Desc12.Format == DXGI_FORMAT_420_OPAQUE) // Feature_D3D1XDisplayable { // 420_OPAQUE doesn't exist in D3D12. Desc12.Format = DXGI_FORMAT_NV12; } HRESULT hr = S_OK; if (Flags11.MiscFlags & D3D11_RESOURCE_MISC_TILE_POOL) { HeapDesc.Flags &= ~D3D12_HEAP_FLAG_CREATE_NOT_RESIDENT; auto TiledResourcesTier = m_pParent->GetCaps().TiledResourcesTier; const UINT InitialPoolSize = UINT(Desc12.Width); m_TilePool.m_Allocations.emplace_back(InitialPoolSize, 0); // throw( bad_alloc ) auto& Allocation = m_TilePool.m_Allocations.front(); // D3D11 now blocks shared tile pools assert(!m_creationArgs.IsShared()); m_pParent->TryAllocateResourceWithFallback([&]() { if (TiledResourcesTier == D3D12_TILED_RESOURCES_TIER_1) { // Nothing to do here - heaps are lazily instantiated depending on what is mapped into them } else { hr = m_pParent->m_pDevice12->CreateHeap( &HeapDesc, IID_PPV_ARGS(&Allocation.m_spUnderlyingBufferHeap)); } assert(hr != E_INVALIDARG); ThrowFailure(hr); // throw( _com_error ) }, threadingContext); } else { bool bTiledResource = !!(Flags11.MiscFlags & D3D11_RESOURCE_MISC_TILED); // All sharing must use this API in order to enable in-API sharing bool bCompatibilityCreateRequired = m_creationArgs.IsShared(); // True if simultaneous access will pass D3D12 validation bool bSupportsSimultaneousAccess = (Flags11.BindFlags & D3D11_BIND_DEPTH_STENCIL) == 0 && Desc12.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && Desc12.SampleDesc.Count == 1 && Desc12.SampleDesc.Quality == 0; unique_comptr spSwapChainAssistant; if (HeapDesc.Flags & D3D12_HEAP_FLAG_ALLOW_DISPLAY) { // We need a swapchain assistant in order for D3D12 to track writes to this // resource and insert written primary references, to guarantee synchronization // against scanout. // This class doesn't need to do anything other than be present. spSwapChainAssistant.reset(new SwapChainAssistant); m_creationArgs.m_bManageResidency = false; } if (bCompatibilityCreateRequired || !m_creationArgs.m_bManageResidency) { HeapDesc.Flags &= ~D3D12_HEAP_FLAG_CREATE_NOT_RESIDENT; } m_pParent->TryAllocateResourceWithFallback([&]() { if (m_creationArgs.m_PrivateCreateFn) { m_creationArgs.m_PrivateCreateFn(m_creationArgs, spSwapChainAssistant.get(), &m_Identity->m_spUnderlyingResource); } else if (bCompatibilityCreateRequired) { // Shared resources should use simultaneous access whenever possible // For future designs: Shared resources that cannot use simultaneous access should, in theory, transition to COMMON at every flush Desc12.Flags |= (bSupportsSimultaneousAccess && !(Flags11.MiscFlags & 0x200000) // Feature_D3D1XDisplayable : RESOURCE_MISC_SHARED_EXCLUSIVE_WRITER ) ? D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS : D3D12_RESOURCE_FLAG_NONE ; assert(!bTiledResource); assert((HeapDesc.Flags & D3D12_HEAP_FLAG_SHARED) != 0); D3D12_COMPATIBILITY_SHARED_FLAGS CompatFlags = ((m_creationArgs.IsNTHandleShared()) ? D3D12_COMPATIBILITY_SHARED_FLAG_NONE : D3D12_COMPATIBILITY_SHARED_FLAG_NON_NT_HANDLE) | ((m_creationArgs.m_flags11.MiscFlags & D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX) ? D3D12_COMPATIBILITY_SHARED_FLAG_KEYED_MUTEX : D3D12_COMPATIBILITY_SHARED_FLAG_NONE) | (m_creationArgs.m_bIsD3D9on12Resource && !(HeapDesc.Flags & D3D12_HEAP_FLAG_SHARED_CROSS_ADAPTER) ? D3D12_COMPATIBILITY_SHARED_FLAG_9_ON_12 : D3D12_COMPATIBILITY_SHARED_FLAG_NONE); hr = m_pParent->m_pCompatDevice->CreateSharedResource( &HeapDesc.Properties, HeapDesc.Flags, &Desc12, D3D12_RESOURCE_STATE_COMMON, nullptr, // Clear values &Flags11, CompatFlags, nullptr, spSwapChainAssistant.get(), IID_PPV_ARGS(&m_Identity->m_spUnderlyingResource)); } else if (bTiledResource) { // Tiled resources should use simultaneous access if aliasing is valid by the D3D11 spec (non-RTV/non-DSV) bool bDesiresSimultaneousAccess = (Flags11.BindFlags & (D3D11_BIND_RENDER_TARGET | D3D11_BIND_DEPTH_STENCIL)) == 0; Desc12.Flags |= (bSupportsSimultaneousAccess && bDesiresSimultaneousAccess) ? D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS : D3D12_RESOURCE_FLAG_NONE; hr = m_pParent->m_pDevice12->CreateReservedResource( &Desc12, D3D12_RESOURCE_STATE_COMMON, nullptr, // Clear values IID_PPV_ARGS(&m_Identity->m_spUnderlyingResource)); } else { hr = m_pParent->m_pDevice12->CreateCommittedResource( &HeapDesc.Properties, HeapDesc.Flags, &Desc12, D3D12_RESOURCE_STATE_COMMON, nullptr, // Clear values IID_PPV_ARGS(&m_Identity->m_spUnderlyingResource)); } [[maybe_unused]] const UINT D3D11_BIND_CAPTURE = 0x800; assert(hr != E_INVALIDARG || (Flags11.BindFlags & D3D11_BIND_CAPTURE)); ThrowFailure(hr); // throw( _com_error ) }, threadingContext); if (!bCompatibilityCreateRequired && !bTiledResource && m_creationArgs.m_bManageResidency) { AddToResidencyManager((HeapDesc.Flags & D3D12_HEAP_FLAG_CREATE_NOT_RESIDENT) == D3D12_HEAP_FLAG_NONE); } if (bTiledResource) { InitializeTilingData(); } } } ManagedObject *Resource::GetResidencyHandle() { ManagedObject *pObject = nullptr; if (m_Identity && m_Identity->m_pResidencyHandle) { pObject = &m_Identity->m_pResidencyHandle->GetManagedObject(); } return pObject; } void Resource::UsedInCommandList(COMMAND_LIST_TYPE commandListType, UINT64 id) { assert(commandListType != COMMAND_LIST_TYPE::UNKNOWN); m_pParent->AddObjectToResidencySet(this, commandListType); if (m_Identity && m_Identity->HasRestrictedOutstandingResources()) { OutstandingResourceUse resourceUse(commandListType, id); // Search existing references to see if we've used it in this command list before. // Can't just check the back because it might be used simultaneously in multiple // command list types. // TODO: Is this really the right place for it? if (std::find(m_Identity->m_OutstandingResources.begin(), m_Identity->m_OutstandingResources.end(), resourceUse) == m_Identity->m_OutstandingResources.end()) { m_Identity->m_OutstandingResources.push_back(resourceUse); } } DeviceChild::UsedInCommandList(commandListType, id); } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::UnderlyingResourceChanged() noexcept(false) { // Tile pools don't have a resource if (!m_Identity->GetResource()) { return; } m_Identity->m_currentState.Reset(); // For resources which own their underlying subresource, they're created in COMMON. if (!m_Identity->m_bOwnsUnderlyingResource) { CCurrentResourceState::ExclusiveState State = { 0ull, GetDefaultPoolState(GetAllocatorHeapType()), GetAllocatorHeapType() == AllocatorHeapType::Decoder ? COMMAND_LIST_TYPE::VIDEO_DECODE : COMMAND_LIST_TYPE::GRAPHICS }; m_Identity->m_currentState.SetExclusiveResourceState(State); } ResetLastUsedInCommandList(); m_DesiredState.Reset(); } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::ZeroConstantBufferPadding() noexcept { // Determine if we need to do any work if ((AppDesc()->BindFlags() & RESOURCE_BIND_CONSTANT_BUFFER) == 0 || // The only buffers that are bloated AppDesc()->Width() % D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT == 0 || // Only if it's actually bloated m_Identity->m_bOwnsUnderlyingResource) // Default constant buffers are handled separately { return; } UINT64 AlignedSize = m_SubresourcePlacement[0].Footprint.RowPitch; SIZE_T ZeroSize = static_cast(AlignedSize - AppDesc()->Width()); void *pData; CD3DX12_RANGE ReadRange(0, 0); HRESULT hr = GetUnderlyingResource()->Map(0, &ReadRange, &pData); if (SUCCEEDED(hr)) { BYTE* pZeroAddr = reinterpret_cast(pData)+m_SubresourcePlacement[0].Offset + AppDesc()->Width(); ZeroMemory(pZeroAddr, ZeroSize); CD3DX12_RANGE WrittenRange(SIZE_T(m_SubresourcePlacement[0].Offset) + AppDesc()->Width(), 0); WrittenRange.End = WrittenRange.Begin + ZeroSize; GetUnderlyingResource()->Unmap(0, &WrittenRange); } } //---------------------------------------------------------------------------------------------------------------------------------- D3D12_PLACED_SUBRESOURCE_FOOTPRINT& Resource::GetSubresourcePlacement(UINT subresource) noexcept { return m_SubresourcePlacement[subresource]; } void Resource::UpdateAppDesc(const AppResourceDesc &AppDesc) { m_creationArgs.m_appDesc = AppDesc; InitializeSubresourceDescs(); } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::InitializeSubresourceDescs() noexcept(false) { m_Identity->m_bPlacedTexture = m_creationArgs.m_isPlacedTexture; CD3DX12_RESOURCE_DESC ResDesc( AppDesc()->ResourceDimension(), 0, AppDesc()->Width(), AppDesc()->Height(), static_cast(AppDesc()->ResourceDimension() == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? AppDesc()->Depth() : AppDesc()->ArraySize()), AppDesc()->MipLevels(), AppDesc()->Format(), 1, 0, AppDesc()->ResourceDimension() == D3D12_RESOURCE_DIMENSION_BUFFER ? D3D12_TEXTURE_LAYOUT_ROW_MAJOR : D3D12_TEXTURE_LAYOUT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE); ID3D12Device* pDevice = m_pParent->m_pDevice12.get(); pDevice->GetCopyableFootprints(&ResDesc, 0, NumSubresources(), 0, &m_SubresourcePlacement[0], nullptr, nullptr, nullptr); // If unrestricted pitch is supported, we can use tighter packing for non-planar resources // instead of the default 256-byte alignment that GetCopyableFootprints applies if (m_pParent->GetOptions13().UnrestrictedBufferTextureCopyPitchSupported && AppDesc()->NonOpaquePlaneCount() == 1) { UINT64 m_totalSize = 0; for (UINT subresourceIndex = 0; subresourceIndex < NumSubresources(); subresourceIndex++) { auto& footprint = m_SubresourcePlacement[subresourceIndex]; UINT minPitch = 0; CD3D11FormatHelper::CalculateMinimumRowMajorRowPitch(footprint.Footprint.Format, footprint.Footprint.Width, minPitch); footprint.Footprint.RowPitch = minPitch; footprint.Offset = m_totalSize; UINT slicePitch = 0; CD3D11FormatHelper::CalculateMinimumRowMajorSlicePitch(footprint.Footprint.Format, footprint.Footprint.RowPitch, footprint.Footprint.Height, slicePitch); m_totalSize += slicePitch * footprint.Footprint.Depth; } } if (m_SubresourcePlacement[0].Footprint.RowPitch == -1) { ThrowFailure(E_INVALIDARG); } if (NumSubresources() > 1) { // Offset produced by GetCopyableFootprints is ignored, because 11on12 chooses a different layout in the buffer than D3D12 // Subresource order in D3D12 is Plane -> Array -> Mip // However, for CPU-accessible data, the desired memory layout is Array -> Plane -> Mip UINT LastSubresource = 0; const UINT ArraySize = AppDesc()->ArraySize(), PlaneCount = AppDesc()->NonOpaquePlaneCount() * m_SubresourceMultiplier, MipLevels = AppDesc()->MipLevels(); for (UINT ArraySlice = 0; ArraySlice < ArraySize; ++ArraySlice) { for (UINT PlaneSlice = 0; PlaneSlice < PlaneCount; ++PlaneSlice) { for (UINT MipLevel = 0; MipLevel < MipLevels; ++MipLevel) { auto& LastPlacement = GetSubresourcePlacement(LastSubresource); UINT CurrentSubresource = ComposeSubresourceIdxExtended(MipLevel, ArraySlice, PlaneSlice, MipLevels, ArraySize); if (CurrentSubresource != 0) { GetSubresourcePlacement(CurrentSubresource).Offset = Align(LastPlacement.Offset + DepthPitch(LastSubresource) * LastPlacement.Footprint.Depth, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT); } LastSubresource = CurrentSubresource; } } } } else if(!m_pParent->GetOptions13().UnrestrictedBufferTextureCopyPitchSupported || AppDesc()->ResourceDimension() == D3D12_RESOURCE_DIMENSION_BUFFER) { // D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT is used to ensure that constant buffers are multiples of 256 bytes in size m_SubresourcePlacement[0].Footprint.RowPitch = Align(m_SubresourcePlacement[0].Footprint.RowPitch, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); } } void Resource::FillSubresourceDesc(ID3D12Device* pDevice, bool supportsUnrestrictedBufferTextureCopyPitch, DXGI_FORMAT Format, UINT PlaneWidth, UINT PlaneHeight, UINT Depth, _Out_ D3D12_PLACED_SUBRESOURCE_FOOTPRINT& Placement) noexcept { CD3DX12_RESOURCE_DESC ResourceDesc( D3D12_RESOURCE_DIMENSION_TEXTURE2D, 0, // Alignment PlaneWidth, PlaneHeight, static_cast(Depth), 1, // Array size Format, 1, // Sample count 0, // Sample quality D3D12_TEXTURE_LAYOUT_UNKNOWN, D3D12_RESOURCE_FLAG_NONE ); if (Format == DXGI_FORMAT_UNKNOWN) { ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; ResourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; } else if (Depth > 1) { ResourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; } pDevice->GetCopyableFootprints(&ResourceDesc, 0, 1, 0, &Placement, nullptr, nullptr, nullptr); // D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT is used to ensure that constant buffers are multiples of 256 bytes in size if (!supportsUnrestrictedBufferTextureCopyPitch || ResourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { Placement.Footprint.RowPitch = Align(Placement.Footprint.RowPitch, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT); } } //---------------------------------------------------------------------------------------------------------------------------------- UINT Resource::DepthPitch(UINT Subresource) noexcept { auto& Placement = GetSubresourcePlacement(Subresource).Footprint; assert(Placement.Width > 0 && Placement.Height > 0 && Placement.Depth > 0 && Placement.RowPitch > 0); // The placement format should be in the same cast set as the resource format unless the format is planar. // Also, DepthStencil formats are planar in 12 only and need to be checked seperately. assert(m_pParent->GetParentForFormat(AppDesc()->Format()) == CD3D11FormatHelper::GetParentFormat(Placement.Format) || CD3D11FormatHelper::Planar(m_pParent->GetParentForFormat(AppDesc()->Format())) || CD3D11FormatHelper::FamilySupportsStencil(m_pParent->GetParentForFormat(AppDesc()->Format()))); UINT SlicePitch; CD3D11FormatHelper::CalculateMinimumRowMajorSlicePitch(Placement.Format, Placement.RowPitch, Placement.Height, SlicePitch); return SlicePitch; } //---------------------------------------------------------------------------------------------------------------------------------- D3D12_RANGE Resource::GetSubresourceRange(UINT Subresource, _In_opt_ const D3D12_BOX *pSelectedBox) noexcept { auto& SubresourceInfo = GetSubresourcePlacement(Subresource); UINT TightRowPitch; CD3D11FormatHelper::CalculateMinimumRowMajorRowPitch(SubresourceInfo.Footprint.Format, SubresourceInfo.Footprint.Width, TightRowPitch); SIZE_T StartOffset = SIZE_T(SubresourceInfo.Offset); SIZE_T Size = DepthPitch(Subresource) * (SubresourceInfo.Footprint.Depth - 1) + TightRowPitch; if (pSelectedBox) { if (Parent()->m_desc12.Format == DXGI_FORMAT_UNKNOWN) { assert(Parent()->m_desc12.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER); assert(Subresource == 0); StartOffset += pSelectedBox->left; Size = pSelectedBox->right - pSelectedBox->left; } else { const UINT BITS_PER_BYTE = 8; const UINT BytesPerPixel = CD3D11FormatHelper::GetBitsPerUnit(Parent()->m_desc12.Format) / BITS_PER_BYTE; const UINT SlicePitch = SubresourceInfo.Footprint.Height * SubresourceInfo.Footprint.RowPitch; StartOffset += pSelectedBox->left * BytesPerPixel + pSelectedBox->top * SubresourceInfo.Footprint.RowPitch + pSelectedBox->front * SlicePitch; UINT DepthSize = (pSelectedBox->back - pSelectedBox->front - 1) * DepthPitch(Subresource); UINT HeightSize = (pSelectedBox->bottom - pSelectedBox->top - 1) * SubresourceInfo.Footprint.RowPitch; UINT RowSize = (pSelectedBox->right - pSelectedBox->left) * BytesPerPixel; Size = DepthSize + HeightSize + RowSize; } } return CD3DX12_RANGE(StartOffset, StartOffset + Size); } //---------------------------------------------------------------------------------------------------------------------------------- UINT64 Resource::GetResourceSize() noexcept { UINT Subresource = NumSubresources() - 1; auto& LastSubresourcePlacement = GetSubresourcePlacement(Subresource); UINT64 TotalSize = DepthPitch(Subresource) * LastSubresourcePlacement.Footprint.Depth + (LastSubresourcePlacement.Offset - m_SubresourcePlacement[0].Offset); return TotalSize; } //---------------------------------------------------------------------------------------------------------------------------------- D3D12_HEAP_TYPE Resource::GetD3D12HeapType(RESOURCE_USAGE usage, UINT cpuAccessFlags) noexcept { if (usage == RESOURCE_USAGE_DEFAULT || usage == RESOURCE_USAGE_IMMUTABLE) { return D3D12_HEAP_TYPE_DEFAULT; } else { assert(usage == RESOURCE_USAGE_DYNAMIC || usage == RESOURCE_USAGE_STAGING); // Using GetCustomHeapProperties with Readback heaps allows for both reading // and writing to the the heap so choose this heap if the cpu access is read, // regardless of whether there's also CPU write access if (cpuAccessFlags & RESOURCE_CPU_ACCESS_READ) return D3D12_HEAP_TYPE_READBACK; if (cpuAccessFlags & RESOURCE_CPU_ACCESS_WRITE) return D3D12_HEAP_TYPE_UPLOAD; return D3D12_HEAP_TYPE_DEFAULT; } } // This should only ever be called by 9on12 because of how the 9 runtime // doesn't unbind things before it deletes them. //---------------------------------------------------------------------------------------------------------------------------------- void Resource::ClearInputBindings() { UnBindAsCBV(); UnBindAsSRV(); UnBindAsVB(); UnBindAsIB(); } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::ClearOutputBindings() { UnBindAsRTV(); UnBindAsDSV(); } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::UnBindAsRTV() { if (m_currentBindings.IsBoundAsRenderTarget()) { auto unbindLambda = [=](UINT, UINT slot) { m_pParent->ClearRTVBinding(slot); }; UnbindList(m_currentBindings.GetRenderTargetList(), unbindLambda); } } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::UnBindAsDSV() { if (m_currentBindings.IsBoundAsDepthStencil()) { m_pParent->ClearDSVBinding(); } } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::UnBindAsCBV() { Resource *nullCB[] = { nullptr }; for (UINT stage = 0; stage < ShaderStageCount; ++stage) { auto& bindings = m_currentBindings.m_ConstantBufferBindings[stage]; for (UINT slot = 0; bindings.any(); ++slot) { if (!bindings.test(slot)) { continue; } switch (stage) { case e_PS: m_pParent->SetConstantBuffers(slot, 1, nullCB, nullptr, nullptr); break; case e_VS: m_pParent->SetConstantBuffers(slot, 1, nullCB, nullptr, nullptr); break; case e_GS: m_pParent->SetConstantBuffers(slot, 1, nullCB, nullptr, nullptr); break; case e_HS: m_pParent->SetConstantBuffers(slot, 1, nullCB, nullptr, nullptr); break; case e_DS: m_pParent->SetConstantBuffers(slot, 1, nullCB, nullptr, nullptr); break; case e_CS: m_pParent->SetConstantBuffers(slot, 1, nullCB, nullptr, nullptr); break; default: assert(false); } } } } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::UnBindAsSRV() { SRV *pNullSRV = nullptr; if (m_currentBindings.IsBoundAsShaderResource()) { auto unbindLambda = [=](UINT stage, UINT slot) { switch (stage) { case e_PS: m_pParent->SetShaderResources(slot, 1, &pNullSRV); break; case e_VS: m_pParent->SetShaderResources(slot, 1, &pNullSRV); break; case e_GS: m_pParent->SetShaderResources(slot, 1, &pNullSRV); break; case e_HS: m_pParent->SetShaderResources(slot, 1, &pNullSRV); break; case e_DS: m_pParent->SetShaderResources(slot, 1, &pNullSRV); break; case e_CS: m_pParent->SetShaderResources(slot, 1, &pNullSRV); break; default: assert(false); } }; UnbindList(m_currentBindings.GetShaderResourceList(), unbindLambda); } } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::UnBindAsVB() { UINT& vbBindings = m_currentBindings.GetVertexBufferBindings(); if (vbBindings) { for (UINT8 i = 0; i < D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; i++) { UINT32 bit = (1 << i); if (vbBindings & bit) { m_pParent->ClearVBBinding(i); vbBindings &= ~(bit); } if (vbBindings == 0) { break; } } } } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::UnBindAsIB() { if (m_currentBindings.IsBoundAsIndexBuffer()) { m_pParent->IaSetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); } } //---------------------------------------------------------------------------------------------------------------------------------- bool Resource::WaitForOutstandingResourcesIfNeeded(bool DoNotWait) { while (m_Identity && m_Identity->m_OutstandingResources.size() >= (size_t)m_Identity->m_MaxOutstandingResources) { auto outstandingResourceUse = m_Identity->m_OutstandingResources.front(); if (!m_pParent->WaitForFenceValue(outstandingResourceUse.commandListType, outstandingResourceUse.fenceValue, DoNotWait)) // throws { return false; } m_Identity->m_OutstandingResources.erase(m_Identity->m_OutstandingResources.begin()); } return true; } //---------------------------------------------------------------------------------------------------------------------------------- Resource* Resource::GetCurrentCpuHeap(UINT Subresource) { if (m_spCurrentCpuHeaps.size() == 0) { return nullptr; } UINT DynamicTextureIndex = GetDynamicTextureIndex(Subresource); return m_spCurrentCpuHeaps[DynamicTextureIndex].get(); } //---------------------------------------------------------------------------------------------------------------------------------- void Resource::SetCurrentCpuHeap(UINT subresource, Resource* UploadHeap) { UINT DynamicTextureIndex = GetDynamicTextureIndex(subresource); m_spCurrentCpuHeaps[DynamicTextureIndex] = UploadHeap; } //---------------------------------------------------------------------------------------------------------------------------------- HRESULT Resource::AddFenceForUnwrapResidency(ID3D12CommandQueue* pQueue) { try { if (m_UnwrapUnderlyingResidencyDeferredWait.fence.get() == nullptr) { m_UnwrapUnderlyingResidencyDeferredWait.fence = std::make_shared(m_pParent, FENCE_FLAG_NONE, 0); // throw (_com_error, bad_alloc) m_UnwrapUnderlyingResidencyDeferredWait.value = 0; } ++m_UnwrapUnderlyingResidencyDeferredWait.value; pQueue->Signal(m_UnwrapUnderlyingResidencyDeferredWait.fence->Get(), m_UnwrapUnderlyingResidencyDeferredWait.value); m_ResourceDeferredWaits.push_back(m_UnwrapUnderlyingResidencyDeferredWait); } catch( _com_error& hrEx ) { return hrEx.Error(); } catch(std::bad_alloc&) { return E_OUTOFMEMORY; } return S_OK; } }; ================================================ FILE: src/ResourceBinding.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //================================================================================================================================== // CResourceBindings // The inverse of CContext::m_PState, allows fast lookups for bind points of a given resource // Used to determine resource/subresource state, as well as for rebinding resources on discard/LOD change //================================================================================================================================== CResourceBindings::CResourceBindings(UINT SubresourceCount, UINT BindFlags, void*& pPreallocatedArray) noexcept : m_BindFlags(BindFlags) , m_NumViewsReferencingSubresources(0) , m_VertexBufferBindings(0) , m_StreamOutBindings(0) , m_bIsDepthStencilViewBound(false) , m_bIsIndexBufferBound(false) , m_SubresourceBindings(SubresourceCount, pPreallocatedArray) { D3D12TranslationLayer::InitializeListHead(&m_ShaderResourceViewList); D3D12TranslationLayer::InitializeListHead(&m_RenderTargetViewList); D3D12TranslationLayer::InitializeListHead(&m_UnorderedAccessViewList); } //---------------------------------------------------------------------------------------------------------------------------------- CResourceBindings::~CResourceBindings() { assert(D3D12TranslationLayer::IsListEmpty(&m_ShaderResourceViewList)); assert(D3D12TranslationLayer::IsListEmpty(&m_RenderTargetViewList)); assert(D3D12TranslationLayer::IsListEmpty(&m_UnorderedAccessViewList)); } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::ViewBoundCommon(CViewSubresourceSubset& viewSubresources, void (CSubresourceBindings::*pfnBound)()) { if (!viewSubresources.IsWholeResource()) { ++m_NumViewsReferencingSubresources; } for (auto subresources : viewSubresources) { for (UINT i = subresources.first; i < subresources.second; ++i) { (m_SubresourceBindings[i].*pfnBound)(); } } } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::ViewUnboundCommon(CViewSubresourceSubset& viewSubresources, void (CSubresourceBindings::*pfnUnbound)()) { if (!viewSubresources.IsWholeResource()) { --m_NumViewsReferencingSubresources; } for (auto subresources : viewSubresources) { for (UINT i = subresources.first; i < subresources.second; ++i) { (m_SubresourceBindings[i].*pfnUnbound)(); } } } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::VertexBufferBound(UINT slot) { assert(slot < D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT); UINT slotBit = (1 << slot); assert((m_VertexBufferBindings & slotBit) == 0); m_VertexBufferBindings |= slotBit; } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::StreamOutputBufferBound(UINT slot) { assert(slot < D3D11_SO_STREAM_COUNT); UINT slotBit = (1 << slot); assert((m_StreamOutBindings & slotBit) == 0); m_StreamOutBindings |= slotBit; } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::ConstantBufferBound(EShaderStage stage, UINT slot) { m_ConstantBufferBindings[stage].set(slot); m_ConstantBufferBindRefs++; } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::IndexBufferBound() { assert(!m_bIsIndexBufferBound); m_bIsIndexBufferBound = true; } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::VertexBufferUnbound(UINT slot) { assert(slot < D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT); UINT slotBit = (1 << slot); assert((m_VertexBufferBindings & slotBit) == slotBit); m_VertexBufferBindings &= ~slotBit; } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::StreamOutputBufferUnbound(UINT slot) { assert(slot < D3D11_SO_STREAM_COUNT); UINT slotBit = (1 << slot); assert((m_StreamOutBindings & slotBit) == slotBit); m_StreamOutBindings &= ~slotBit; } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::ConstantBufferUnbound(EShaderStage stage, UINT slot) { assert(m_ConstantBufferBindRefs > 0 && m_ConstantBufferBindings[stage][slot]); m_ConstantBufferBindRefs--; m_ConstantBufferBindings[stage].set(slot, false); } //---------------------------------------------------------------------------------------------------------------------------------- void CResourceBindings::IndexBufferUnbound() { assert(m_bIsIndexBufferBound); m_bIsIndexBufferBound = false; } //---------------------------------------------------------------------------------------------------------------------------------- D3D12_RESOURCE_STATES CResourceBindings::GetD3D12ResourceUsageFromBindings(UINT subresource) const { const CSubresourceBindings& S = m_SubresourceBindings[subresource]; D3D12_RESOURCE_STATES BufferUsage = D3D12_RESOURCE_STATE_COMMON; if (subresource == 0 && m_SubresourceBindings.size() == 1) { BufferUsage = (IsBoundAsVertexBuffer() || IsBoundAsConstantBuffer() ? D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER : D3D12_RESOURCE_STATE_COMMON) | (IsBoundAsIndexBuffer() ? D3D12_RESOURCE_STATE_INDEX_BUFFER : D3D12_RESOURCE_STATE_COMMON) | (IsBoundAsStreamOut() ? D3D12_RESOURCE_STATE_STREAM_OUT : D3D12_RESOURCE_STATE_COMMON); // If a buffer has the STREAM_OUTPUT bind flag // and is bound as a VB, then add the IndirectArgument resource usage // This is needed because DrawAuto is converted to DrawIndirect // and the indirect arguments are at the end of the buffer if (IsBoundAsVertexBuffer() && (m_BindFlags & D3D11_BIND_STREAM_OUTPUT)) { BufferUsage |= D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; } } assert(!(S.IsBoundAsWritableDepth() && S.IsBoundAsReadOnlyDepth())); D3D12_RESOURCE_STATES Usage = BufferUsage | (S.IsBoundAsPixelShaderResource() ? D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE : D3D12_RESOURCE_STATE_COMMON) | (S.IsBoundAsNonPixelShaderResource() ? D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE : D3D12_RESOURCE_STATE_COMMON) | (S.IsBoundAsRenderTarget() ? D3D12_RESOURCE_STATE_RENDER_TARGET : D3D12_RESOURCE_STATE_COMMON) | (S.IsBoundAsUnorderedAccess() ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_COMMON) | (S.IsBoundAsWritableDepth() ? D3D12_RESOURCE_STATE_DEPTH_WRITE : D3D12_RESOURCE_STATE_COMMON) | (S.IsBoundAsReadOnlyDepth() ? D3D12_RESOURCE_STATE_DEPTH_READ : D3D12_RESOURCE_STATE_COMMON); return Usage ? Usage : UNKNOWN_RESOURCE_STATE; } //---------------------------------------------------------------------------------------------------------------------------------- COMMAND_LIST_TYPE CResourceBindings::GetCommandListTypeFromBindings() const { if (m_BindFlags & D3D11_BIND_DECODER) { return COMMAND_LIST_TYPE::VIDEO_DECODE; } else { return COMMAND_LIST_TYPE::GRAPHICS; } } }; ================================================ FILE: src/ResourceCache.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { ResourceCache::ResourceCache(ImmediateContext &device) : m_device(device) { }; ResourceCacheEntry const& ResourceCache::GetResource(DXGI_FORMAT format, UINT width, UINT height, DXGI_FORMAT viewFormat) { ResourceCacheEntry& CacheEntry = m_Cache[format]; if (CacheEntry.m_Resource) { // If the resource is bigger than the clear resource we have cached, reallocate a bigger one. // Note that bigger is better in this case because we can also use large resources for copying clear // color into smaller resources as well auto pAppDesc = CacheEntry.m_Resource->AppDesc(); if (pAppDesc->Width() < width || pAppDesc->Height() < height) { width = max(width, pAppDesc->Width()); height = max(height, pAppDesc->Height()); CacheEntry = ResourceCacheEntry{}; } } // The D3D12 runtime also defaults DXGI_FORMAT_UNKNOWN to the resource format of the texture when creating a view. viewFormat = viewFormat == DXGI_FORMAT_UNKNOWN ? format : viewFormat; if (!CacheEntry.m_Resource) { ResourceCreationArgs createArg = {}; createArg.m_appDesc = AppResourceDesc(1, // SubresourcesPerPlane (UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(format), //PlaneCount CD3D11FormatHelper::NonOpaquePlaneCount(format), //SubresourceCount 1, // Mips 1, // ArraySize 1, // Depth width, height, format, 1, 0, // SampleDesc RESOURCE_USAGE_DEFAULT, (RESOURCE_CPU_ACCESS)0, // CPUAccess (RESOURCE_BIND_FLAGS)(RESOURCE_BIND_RENDER_TARGET | RESOURCE_BIND_SHADER_RESOURCE), D3D12_RESOURCE_DIMENSION_TEXTURE2D); createArg.m_desc12 = CD3DX12_RESOURCE_DESC::Tex2D(format, width, height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); createArg.m_heapDesc = CD3DX12_HEAP_DESC(0, D3D12_HEAP_TYPE_DEFAULT); createArg.m_flags11.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; createArg.m_bManageResidency = true; // TODO: Pass down the clear color to D3D12's create resource CacheEntry.m_Resource = Resource::CreateResource(&m_device, createArg, ResourceAllocationContext::ImmediateContextThreadLongLived); } if ( !CacheEntry.m_RTV || !(CacheEntry.m_RTV->GetDesc12().Format == viewFormat)) { D3D12_RENDER_TARGET_VIEW_DESC RTVDesc = {}; RTVDesc.Format = viewFormat; RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; RTVDesc.Texture2D.MipSlice = 0; RTVDesc.Texture2D.PlaneSlice = 0; CacheEntry.m_RTV.reset(new RTV(&m_device, RTVDesc, *CacheEntry.m_Resource)); } if ( !CacheEntry.m_SRV || !(CacheEntry.m_SRV->GetDesc12().Format == viewFormat)) { D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; SRVDesc.Format = viewFormat; SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; SRVDesc.Texture2D.MipLevels = 1; SRVDesc.Texture2D.MostDetailedMip = 0; SRVDesc.Texture2D.PlaneSlice = 0; SRVDesc.Texture2D.ResourceMinLODClamp = 0.0f; CacheEntry.m_SRV.reset(new SRV(&m_device, SRVDesc, *CacheEntry.m_Resource)); } return CacheEntry; } void ResourceCache::TakeCacheEntryOwnership(DXGI_FORMAT format, ResourceCacheEntry& entryOut) { ResourceCacheEntry& CacheEntry = m_Cache[format]; entryOut = std::move(CacheEntry); } } ================================================ FILE: src/ResourceState.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- auto CDesiredResourceState::GetSubresourceInfo(UINT SubresourceIndex) const noexcept -> SubresourceInfo const& { if (AreAllSubresourcesSame()) { SubresourceIndex = 0; } return m_spSubresourceInfo[SubresourceIndex]; } //---------------------------------------------------------------------------------------------------------------------------------- void CDesiredResourceState::SetResourceState(SubresourceInfo const & Info) noexcept { m_bAllSubresourcesSame = true; m_spSubresourceInfo[0] = Info; } //---------------------------------------------------------------------------------------------------------------------------------- void CDesiredResourceState::SetSubresourceState(UINT SubresourceIndex, SubresourceInfo const & Info) noexcept { if (m_bAllSubresourcesSame && m_spSubresourceInfo.size() > 1) { static_assert(std::extent_v == 1, "Otherwise this fill doesn't work"); std::fill(m_spSubresourceInfo.m_Extra.begin(), m_spSubresourceInfo.m_Extra.end(), m_spSubresourceInfo[0]); m_bAllSubresourcesSame = false; } if (m_spSubresourceInfo.size() == 1) { SubresourceIndex = 0; } m_spSubresourceInfo[SubresourceIndex] = Info; } //---------------------------------------------------------------------------------------------------------------------------------- void CDesiredResourceState::Reset() noexcept { SetResourceState(SubresourceInfo{}); } //---------------------------------------------------------------------------------------------------------------------------------- void CCurrentResourceState::ConvertToSubresourceTracking() noexcept { if (m_bAllSubresourcesSame && m_spExclusiveState.size() > 1) { static_assert(std::extent_v == 1, "Otherwise this fill doesn't work"); std::fill(m_spExclusiveState.m_Extra.begin(), m_spExclusiveState.m_Extra.end(), m_spExclusiveState[0]); if (!m_pSharedState.empty()) { static_assert(std::extent_v == 1, "Otherwise this fill doesn't work"); std::fill(m_pSharedState.m_Extra.begin(), m_pSharedState.m_Extra.end(), m_pSharedState[0]); } m_bAllSubresourcesSame = false; } } //---------------------------------------------------------------------------------------------------------------------------------- CCurrentResourceState::CCurrentResourceState(UINT SubresourceCount, bool bSimultaneousAccess, void*& pPreallocatedMemory) noexcept : m_bSimultaneousAccess(bSimultaneousAccess) , m_spExclusiveState(SubresourceCount, pPreallocatedMemory) , m_pSharedState(bSimultaneousAccess ? SubresourceCount : 0u, pPreallocatedMemory) { m_spExclusiveState[0] = ExclusiveState{}; if (bSimultaneousAccess) { m_spExclusiveState[0].IsMostRecentlyExclusiveState = false; m_pSharedState[0] = SharedState{}; } } //---------------------------------------------------------------------------------------------------------------------------------- bool CCurrentResourceState::IsExclusiveState(UINT SubresourceIndex) const noexcept { if (!SupportsSimultaneousAccess()) { return true; } if (AreAllSubresourcesSame()) { SubresourceIndex = 0; } return m_spExclusiveState[SubresourceIndex].IsMostRecentlyExclusiveState; } //---------------------------------------------------------------------------------------------------------------------------------- void CCurrentResourceState::SetExclusiveResourceState(ExclusiveState const& State) noexcept { m_bAllSubresourcesSame = true; m_spExclusiveState[0] = State; if (!m_pSharedState.empty()) { m_pSharedState[0] = SharedState{}; } } //---------------------------------------------------------------------------------------------------------------------------------- void CCurrentResourceState::SetSharedResourceState(COMMAND_LIST_TYPE Type, UINT64 FenceValue, D3D12_RESOURCE_STATES State) noexcept { assert(!IsD3D12WriteState(State, SubresourceTransitionFlags::None)); m_bAllSubresourcesSame = true; m_spExclusiveState[0].IsMostRecentlyExclusiveState = false; m_pSharedState[0].FenceValues[(UINT)Type] = FenceValue; m_pSharedState[0].State[(UINT)Type] = State; } //---------------------------------------------------------------------------------------------------------------------------------- void CCurrentResourceState::SetExclusiveSubresourceState(UINT SubresourceIndex, ExclusiveState const& State) noexcept { ConvertToSubresourceTracking(); m_spExclusiveState[SubresourceIndex] = State; if (!m_pSharedState.empty()) { m_pSharedState[SubresourceIndex] = SharedState{}; } } //---------------------------------------------------------------------------------------------------------------------------------- void CCurrentResourceState::SetSharedSubresourceState(UINT SubresourceIndex, COMMAND_LIST_TYPE Type, UINT64 FenceValue, D3D12_RESOURCE_STATES State) noexcept { assert(!IsD3D12WriteState(State, SubresourceTransitionFlags::None)); ConvertToSubresourceTracking(); m_spExclusiveState[SubresourceIndex].IsMostRecentlyExclusiveState = false; m_pSharedState[SubresourceIndex].FenceValues[(UINT)Type] = FenceValue; m_pSharedState[SubresourceIndex].State[(UINT)Type] = State; } //---------------------------------------------------------------------------------------------------------------------------------- auto CCurrentResourceState::GetExclusiveSubresourceState(UINT SubresourceIndex) const noexcept -> ExclusiveState const& { if (AreAllSubresourcesSame()) { SubresourceIndex = 0; } return m_spExclusiveState[SubresourceIndex]; } //---------------------------------------------------------------------------------------------------------------------------------- auto CCurrentResourceState::GetSharedSubresourceState(UINT SubresourceIndex) const noexcept -> SharedState const& { assert(!IsExclusiveState(SubresourceIndex)); if (AreAllSubresourcesSame()) { SubresourceIndex = 0; } return m_pSharedState[SubresourceIndex]; } //---------------------------------------------------------------------------------------------------------------------------------- UINT CCurrentResourceState::GetCommandListTypeMask() const noexcept { if (AreAllSubresourcesSame()) { return GetCommandListTypeMask(0); } UINT TypeMask = 0; assert(m_spExclusiveState.size() < MAXUINT); for (UINT i = 0; i < static_cast(m_spExclusiveState.size()); ++i) { TypeMask |= GetCommandListTypeMask(i); } return TypeMask; } //---------------------------------------------------------------------------------------------------------------------------------- UINT CCurrentResourceState::GetCommandListTypeMask(CViewSubresourceSubset const & Subresources) const noexcept { if (AreAllSubresourcesSame()) { return GetCommandListTypeMask(0); } UINT TypeMask = 0; for (auto range : Subresources) { for (UINT i = range.first; i < range.second; ++i) { TypeMask |= GetCommandListTypeMask(i); } } return TypeMask; } //---------------------------------------------------------------------------------------------------------------------------------- UINT CCurrentResourceState::GetCommandListTypeMask(UINT Subresource) const noexcept { if (IsExclusiveState(Subresource)) { return (1 << (UINT)GetExclusiveSubresourceState(Subresource).CommandListType); } else { auto& SharedState = GetSharedSubresourceState(Subresource); UINT TypeMask = 0; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (SharedState.FenceValues[i] > 0) { TypeMask |= (1 << i); } } return TypeMask; } } //---------------------------------------------------------------------------------------------------------------------------------- void CCurrentResourceState::Reset() noexcept { m_bAllSubresourcesSame = true; m_spExclusiveState[0] = ExclusiveState{}; if (!m_pSharedState.empty()) { m_pSharedState[0] = SharedState{}; } } //---------------------------------------------------------------------------------------------------------------------------------- ResourceStateManagerBase::ResourceStateManagerBase() noexcept(false) { D3D12TranslationLayer::InitializeListHead(&m_TransitionListHead); // Reserve some space in these vectors upfront. Values are arbitrary. for (auto& srcVec : m_vSrcResourceBarriers) { srcVec.reserve(50); } m_vDstResourceBarriers.reserve(50); m_vTentativeResourceBarriers.reserve(20); m_vPostApplyUpdates.reserve(50); m_SwapchainDeferredWaits.reserve(5); m_ResourceDeferredWaits.reserve(5); } //---------------------------------------------------------------------------------------------------------------------------------- bool ShouldIgnoreTransitionRequest(CDesiredResourceState::SubresourceInfo const& CurrentState, CDesiredResourceState::SubresourceInfo const& NewState) { // If we're not supposed to transition for bindings, and we have an incoming request to transition to a binding state, ignore it. return (CurrentState.Flags & SubresourceTransitionFlags::NoBindingTransitions) != SubresourceTransitionFlags::None && (NewState.Flags & SubresourceTransitionFlags::TransitionPreDraw) != SubresourceTransitionFlags::None; } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::TransitionResource(TransitionableResourceBase& Resource, CDesiredResourceState::SubresourceInfo const& State) noexcept { if (ShouldIgnoreTransitionRequest(Resource.m_DesiredState.GetSubresourceInfo(0), State)) { return; } Resource.m_DesiredState.SetResourceState(State); if (!Resource.IsTransitionPending()) { InsertHeadList(&m_TransitionListHead, &Resource.m_TransitionListEntry); } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::TransitionSubresources(TransitionableResourceBase& Resource, CViewSubresourceSubset const& Subresources, CDesiredResourceState::SubresourceInfo const& State) noexcept { if (Subresources.IsWholeResource()) { if (ShouldIgnoreTransitionRequest(Resource.m_DesiredState.GetSubresourceInfo(0), State)) { return; } Resource.m_DesiredState.SetResourceState(State); } else { for (auto&& range : Subresources) { for (UINT i = range.first; i < range.second; ++i) { if (ShouldIgnoreTransitionRequest(Resource.m_DesiredState.GetSubresourceInfo(i), State)) { continue; } Resource.m_DesiredState.SetSubresourceState(i, State); } } } if (!Resource.IsTransitionPending()) { InsertHeadList(&m_TransitionListHead, &Resource.m_TransitionListEntry); } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::TransitionSubresource(TransitionableResourceBase& Resource, UINT SubresourceIndex, CDesiredResourceState::SubresourceInfo const& State) noexcept { Resource.m_DesiredState.SetSubresourceState(SubresourceIndex, State); if (!Resource.IsTransitionPending()) { InsertHeadList(&m_TransitionListHead, &Resource.m_TransitionListEntry); } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::AddDeferredWait(std::shared_ptr const& spFence, UINT64 Value) noexcept(false) { m_SwapchainDeferredWaits.emplace_back(DeferredWait{ spFence, Value }); // throw( bad_alloc ) } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::ApplyResourceTransitionsPreamble() noexcept { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; i++) { m_vSrcResourceBarriers[i].clear(); } m_vDstResourceBarriers.clear(); m_vTentativeResourceBarriers.clear(); m_vPostApplyUpdates.clear(); m_DestinationCommandListType = COMMAND_LIST_TYPE::UNKNOWN; m_bApplySwapchainDeferredWaits = false; for (auto& bFlush : m_bFlushQueues) { bFlush = false; } for (auto& FenceValue : m_QueueFenceValuesToWaitOn) { FenceValue = 0; } } //---------------------------------------------------------------------------------------------------------------------------------- /*static*/ bool ResourceStateManagerBase::TransitionRequired(D3D12_RESOURCE_STATES CurrentState, D3D12_RESOURCE_STATES& DestinationState, SubresourceTransitionFlags Flags) noexcept { // An exact match never needs a transition. if (CurrentState == DestinationState) { return false; } // Not an exact match, but an exact match required, so do the transition. if ((Flags & SubresourceTransitionFlags::StateMatchExact) != SubresourceTransitionFlags::None) { return true; } // Current state already contains the destination state, we're good. if ((CurrentState & DestinationState) == DestinationState) { DestinationState = CurrentState; return false; } // If the transition involves a write state, then the destination should just be the requested destination. // Otherwise, accumulate read states to minimize future transitions (by triggering the above condition). if (!IsD3D12WriteState(DestinationState, SubresourceTransitionFlags::None) && !IsD3D12WriteState(CurrentState, SubresourceTransitionFlags::None)) { DestinationState |= CurrentState; } return true; } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::AddCurrentStateUpdate(TransitionableResourceBase& Resource, CCurrentResourceState& CurrentState, UINT SubresourceIndex, D3D12_RESOURCE_STATES NewState, PostApplyExclusiveState ExclusiveState, bool IsGoingToDestinationType) noexcept(false) { PostApplyUpdate Update = { Resource, CurrentState, SubresourceIndex, NewState, ExclusiveState, IsGoingToDestinationType }; m_vPostApplyUpdates.push_back(Update); // throw( bad_alloc ) } //---------------------------------------------------------------------------------------------------------------------------------- auto ResourceStateManagerBase::ProcessTransitioningResource(ID3D12Resource* pTransitioningResource, TransitionableResourceBase& TransitionableResource, CCurrentResourceState& CurrentState, CResourceBindings& BindingState, UINT NumTotalSubresources, _In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) const UINT64* CurrentFenceValues, bool bIsPreDraw) noexcept(false) -> TransitionResult { // By default, assume that the resource is fully processed by this routine. TransitionResult result = TransitionResult::Remove; // Figure out the set of subresources that are transitioning auto& DestinationState = TransitionableResource.m_DesiredState; bool bAllSubresourcesAtOnce = CurrentState.AreAllSubresourcesSame() && DestinationState.AreAllSubresourcesSame(); bool bNeedsTransitionToBindState = false; D3D12_RESOURCE_BARRIER TransitionDesc; ZeroMemory(&TransitionDesc, sizeof(TransitionDesc)); TransitionDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; TransitionDesc.Transition.pResource = pTransitioningResource; UINT numSubresources = bAllSubresourcesAtOnce ? 1 : NumTotalSubresources; for (UINT i = 0; i < numSubresources; ++i) { CDesiredResourceState::SubresourceInfo SubresourceDestinationInfo = DestinationState.GetSubresourceInfo(i); TransitionDesc.Transition.Subresource = bAllSubresourcesAtOnce ? D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES : i; // Is this subresource relevant for the current transition? if ((SubresourceDestinationInfo.Flags & SubresourceTransitionFlags::TransitionPreDraw) != SubresourceTransitionFlags::None && !bIsPreDraw) { // Nope, we'll go to the next subresource, and also indicate to leave this resource in the transition list so that // we come back to it on the next draw operation. result = TransitionResult::Keep; continue; } // Is this subresource currently being used, or is it just being iterated over? D3D12_RESOURCE_STATES after = SubresourceDestinationInfo.State; COMMAND_LIST_TYPE curCmdListType = SubresourceDestinationInfo.CommandListType; SubresourceTransitionFlags Flags = SubresourceDestinationInfo.Flags; if (after == UNKNOWN_RESOURCE_STATE || (curCmdListType == COMMAND_LIST_TYPE::UNKNOWN && (Flags & SubresourceTransitionFlags::StateMatchExact) == SubresourceTransitionFlags::None)) { // This subresource doesn't have any transition requested - move on to the next. continue; } #if DBG std::vector* pBarrierVectors[2 + _countof(m_vSrcResourceBarriers)] = { &m_vTentativeResourceBarriers, &m_vDstResourceBarriers }; std::transform(m_vSrcResourceBarriers, std::end(m_vSrcResourceBarriers), &pBarrierVectors[2], [](auto& vec) { return &vec; }); // This subresource should not already be in any transition list for (auto pVec : pBarrierVectors) { for (auto& desc : *pVec) { assert(!(desc.Transition.pResource == pTransitioningResource && desc.Transition.Subresource == TransitionDesc.Transition.Subresource)); } } #endif // COMMAND_LIST_TYPE::UNKNOWN is only supported as a destination when transitioning to COMMON. // Additionally, all destination command list types processed in a single ApplyAll should // be going to the same command list type (if any). assert(m_DestinationCommandListType == COMMAND_LIST_TYPE::UNKNOWN || (curCmdListType == COMMAND_LIST_TYPE::UNKNOWN && after == D3D12_RESOURCE_STATE_COMMON) || m_DestinationCommandListType == curCmdListType); if (curCmdListType != COMMAND_LIST_TYPE::UNKNOWN) { // We will synchronize so that work submitted to this command list type // occurs after all in-flight references to transitioning resources. m_DestinationCommandListType = curCmdListType; } else { // We will transition this resource to COMMON on whatever command queue // it is currently exclusive on. curCmdListType = CurrentState.GetExclusiveSubresourceState(i).CommandListType; } // Check if we should leave this resource in the list and update its destination to match its bindings. if ((Flags & SubresourceTransitionFlags::NoBindingTransitions) == SubresourceTransitionFlags::None && !bIsPreDraw) { D3D12_RESOURCE_STATES stateFromBindings = BindingState.GetD3D12ResourceUsageFromBindings(i); if (stateFromBindings != UNKNOWN_RESOURCE_STATE && after != stateFromBindings) { bNeedsTransitionToBindState = true; } } // The NoBindingTransition flag should be set on the whole resource or not at all. assert((Flags & SubresourceTransitionFlags::NoBindingTransitions) == SubresourceTransitionFlags::None || !bNeedsTransitionToBindState); if (IsD3D12WriteState(after, Flags) && TransitionableResource.m_bTriggersSwapchainDeferredWaits) { m_bApplySwapchainDeferredWaits = true; } if (!TransitionableResource.m_ResourceDeferredWaits.empty()) { m_ResourceDeferredWaits.insert(m_ResourceDeferredWaits.begin(), TransitionableResource.m_ResourceDeferredWaits.begin(), TransitionableResource.m_ResourceDeferredWaits.end()); // throw( bad_alloc ) TransitionableResource.m_ResourceDeferredWaits.clear(); } if (CurrentState.IsExclusiveState(i)) { ProcessTransitioningSubresourceExclusive( CurrentState, i, curCmdListType, CurrentFenceValues, SubresourceDestinationInfo, after, TransitionableResource, TransitionDesc, Flags); // throw( bad_alloc ) } else { ProcessTransitioningSubresourceShared( CurrentState, i, after, Flags, CurrentFenceValues, curCmdListType, TransitionDesc, TransitionableResource); // throw( bad_alloc ) } } CDesiredResourceState::SubresourceInfo UnknownDestinationState = {}; UnknownDestinationState.CommandListType = COMMAND_LIST_TYPE::UNKNOWN; UnknownDestinationState.State = UNKNOWN_RESOURCE_STATE; UnknownDestinationState.Flags = SubresourceTransitionFlags::None; // Update destination states. if (bNeedsTransitionToBindState) { result = TransitionResult::Keep; bAllSubresourcesAtOnce = BindingState.AreAllSubresourcesTheSame(); numSubresources = bAllSubresourcesAtOnce ? 1 : NumTotalSubresources; for (UINT i = 0; i < numSubresources; ++i) { CDesiredResourceState::SubresourceInfo BindingDestinationState = {}; BindingDestinationState.CommandListType = COMMAND_LIST_TYPE::GRAPHICS; BindingDestinationState.State = BindingState.GetD3D12ResourceUsageFromBindings(i); BindingDestinationState.Flags = SubresourceTransitionFlags::TransitionPreDraw; if (bAllSubresourcesAtOnce) { DestinationState.SetResourceState(BindingDestinationState); } else { DestinationState.SetSubresourceState(i, BindingDestinationState); } } } else if (result == TransitionResult::Remove) // We're done { // Coalesce destination state to ensure that it's set for the entire resource. DestinationState.SetResourceState(UnknownDestinationState); } else if (!bIsPreDraw) { // There must be some subresource which was pending a draw transition, but not one that transitioned this time. // Make sure all *other* subresources have their pending transitions cleared. assert(!DestinationState.AreAllSubresourcesSame() || (DestinationState.GetSubresourceInfo(0).Flags & SubresourceTransitionFlags::TransitionPreDraw) != SubresourceTransitionFlags::None); bAllSubresourcesAtOnce = DestinationState.AreAllSubresourcesSame(); numSubresources = bAllSubresourcesAtOnce ? 1 : NumTotalSubresources; for (UINT i = 0; i < numSubresources; ++i) { if ((DestinationState.GetSubresourceInfo(i).Flags & SubresourceTransitionFlags::TransitionPreDraw) == SubresourceTransitionFlags::None) { if (bAllSubresourcesAtOnce) { DestinationState.SetResourceState(UnknownDestinationState); } else { DestinationState.SetSubresourceState(i, UnknownDestinationState); } } } } return result; } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::ProcessTransitioningSubresourceExclusive( CCurrentResourceState& CurrentState, UINT i, COMMAND_LIST_TYPE curCmdListType, _In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) const UINT64* CurrentFenceValues, CDesiredResourceState::SubresourceInfo& SubresourceDestinationInfo, D3D12_RESOURCE_STATES after, TransitionableResourceBase& TransitionableResource, D3D12_RESOURCE_BARRIER& TransitionDesc, SubresourceTransitionFlags Flags) noexcept(false) { // If the subresource is currently exclusively owned by a queue, there's one of several outcomes: // 1. A transition barrier on the queue which owns it into a different state. // For simultaneous access resources, this only happens if the new usage is in the same command list as the previous. // Otherwise, the resource switches to shared ownership. // 2. Non-simultaneous access only: Changing queue ownership. This results in a barrier to common on the source (if not already COMMON), // and a transition to the destination state on the destination queue (if not COMMON), after a flush on each and sync. // If no barrier was submitted to the source, then the dest will wait on the fence which was stored when the subresource transitioned to COMMON. // 3. Simultaneous access only: Changing queue ownership. This results in flushing the source (if used in the current command list) // and dest (if not empty) with sync. // The subresource remains exclusively owned by the new queue IFF the new state is a write state, otherwise it becomes shared. // 4. Simultaneous access only: Not changing queue ownership, and not used in the current command list. // The subresource simply transitions from exclusive to shared state if the new state is a read state. CCurrentResourceState::ExclusiveState CurrentExclusiveState = CurrentState.GetExclusiveSubresourceState(i); PostApplyExclusiveState PostExclusiveState = PostApplyExclusiveState::Exclusive; auto FlushOrUpdateFenceValue = [&]() { if (CurrentExclusiveState.CommandListType == COMMAND_LIST_TYPE::UNKNOWN) { // The only time we should have an exclusive state with UNKNOWN command list type // is for resources that haven't been used yet. return; } if (CurrentFenceValues[(UINT)CurrentExclusiveState.CommandListType] == CurrentExclusiveState.FenceValue) { m_bFlushQueues[(UINT)CurrentExclusiveState.CommandListType] = true; } else { m_QueueFenceValuesToWaitOn[(UINT)CurrentExclusiveState.CommandListType] = std::max(m_QueueFenceValuesToWaitOn[(UINT)CurrentExclusiveState.CommandListType], CurrentExclusiveState.FenceValue); } }; bool bQueueStateUpdate = (SubresourceDestinationInfo.Flags & SubresourceTransitionFlags::NotUsedInCommandListIfNoStateChange) == SubresourceTransitionFlags::None; if (curCmdListType == CurrentExclusiveState.CommandListType && (!CurrentState.SupportsSimultaneousAccess() || CurrentFenceValues[(UINT)curCmdListType] == CurrentExclusiveState.FenceValue)) { if (TransitionRequired(CurrentExclusiveState.State, /*inout*/ after, SubresourceDestinationInfo.Flags)) { // Case 1: Insert a single concrete barrier. // Note: For simultaneous access resources, barriers go into a tentative list // because further barrier processing may cause us to flush this command queue, // making all simultaneous access resource states decay and then implicitly promote. // For resources which trigger deferred waits, they go into a vector which is only safe to submit // after a flush. // All other resources go into a vector which is safe to submit before a flush. auto& TransitionVector = CurrentState.SupportsSimultaneousAccess() ? m_vTentativeResourceBarriers : (TransitionableResource.m_bTriggersSwapchainDeferredWaits ? m_vDstResourceBarriers : m_vSrcResourceBarriers[(UINT)curCmdListType]); TransitionDesc.Transition.StateBefore = D3D12_RESOURCE_STATES(CurrentExclusiveState.State); TransitionDesc.Transition.StateAfter = D3D12_RESOURCE_STATES(after); assert(TransitionDesc.Transition.StateBefore != TransitionDesc.Transition.StateAfter); TransitionVector.push_back(TransitionDesc); // throw( bad_alloc ) PostExclusiveState = (CurrentState.SupportsSimultaneousAccess() && !IsD3D12WriteState(after, Flags)) ? PostApplyExclusiveState::SharedIfFlushed : PostApplyExclusiveState::Exclusive; bQueueStateUpdate = true; } // Regardless of whether a transition was inserted, we'll update the current state // of this subresource, to at least update its fence value. // Unless it was transitioning to COMMON for CPU access and was already in COMMON. } else if (curCmdListType != CurrentExclusiveState.CommandListType) { if (!CurrentState.SupportsSimultaneousAccess()) { // Case 2: Changing queue ownership with concrete barriers. if (CurrentExclusiveState.State == D3D12_RESOURCE_STATE_COMMON) { FlushOrUpdateFenceValue(); } else { TransitionDesc.Transition.StateBefore = D3D12_RESOURCE_STATES(CurrentExclusiveState.State); TransitionDesc.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; m_vSrcResourceBarriers[(UINT)CurrentExclusiveState.CommandListType].push_back(TransitionDesc); // throw( bad_alloc ) } if (after != D3D12_RESOURCE_STATE_COMMON) { // TODO: Don't do this for SRV or copy src/dest. TransitionDesc.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; TransitionDesc.Transition.StateAfter = D3D12_RESOURCE_STATES(after); m_vDstResourceBarriers.push_back(TransitionDesc); // throw( bad_alloc ) bQueueStateUpdate = true; } } else { // Case 3: Changing queue ownership via promotion/decay. FlushOrUpdateFenceValue(); PostExclusiveState = IsD3D12WriteState(after, Flags) ? PostApplyExclusiveState::Exclusive : PostApplyExclusiveState::Shared; } } else if (CurrentState.SupportsSimultaneousAccess()) { assert(CurrentFenceValues[(UINT)curCmdListType] != CurrentExclusiveState.FenceValue); PostExclusiveState = IsD3D12WriteState(after, Flags) ? PostApplyExclusiveState::Exclusive : PostApplyExclusiveState::Shared; } if (bQueueStateUpdate) { AddCurrentStateUpdate(TransitionableResource, CurrentState, TransitionDesc.Transition.Subresource, after, PostExclusiveState, SubresourceDestinationInfo.CommandListType != COMMAND_LIST_TYPE::UNKNOWN); // throw( bad_alloc ) } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::ProcessTransitioningSubresourceShared( CCurrentResourceState& CurrentState, UINT i, D3D12_RESOURCE_STATES after, SubresourceTransitionFlags Flags, _In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) const UINT64* CurrentFenceValues, COMMAND_LIST_TYPE curCmdListType, D3D12_RESOURCE_BARRIER& TransitionDesc, TransitionableResourceBase& TransitionableResource) noexcept(false) { bool bQueueStateUpdate = (Flags & SubresourceTransitionFlags::NotUsedInCommandListIfNoStateChange) == SubresourceTransitionFlags::None; // If the subresource is not currently exclusively owned by a queue, then it is a simultaneous access resource. // There are several possible outcomes here as well: // 1. The resource is being written to, and transitions into being exclusively owned by a queue. // Any previous shared owners are flushed, along with the new exclusive owner if there were other previous owners. // If the only previous owner was the same as the new owner, and in the same command list, a concrete barrier is inserted. // 2. The resource is not being written to. The shared state is simply updated. No barriers are issued. assert(CurrentState.SupportsSimultaneousAccess()); auto& SharedState = CurrentState.GetSharedSubresourceState(i); PostApplyExclusiveState PostExclusiveState = PostApplyExclusiveState::Shared; if (IsD3D12WriteState(after, Flags)) { PostExclusiveState = PostApplyExclusiveState::Exclusive; for (UINT CommandListType = 0; CommandListType < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++CommandListType) { assert(!IsD3D12WriteState(SharedState.State[CommandListType], SubresourceTransitionFlags::None)); if (SharedState.FenceValues[CommandListType] == CurrentFenceValues[CommandListType]) { if (CommandListType == (UINT)curCmdListType) { // Only prep a barrier if it's a real write state. if (IsD3D12WriteState(after, SubresourceTransitionFlags::None)) { TransitionDesc.Transition.StateBefore = D3D12_RESOURCE_STATES(SharedState.State[CommandListType]); TransitionDesc.Transition.StateAfter = D3D12_RESOURCE_STATES(after); assert(TransitionDesc.Transition.StateBefore != TransitionDesc.Transition.StateAfter); m_vTentativeResourceBarriers.push_back(TransitionDesc); // throw( bad_alloc ) bQueueStateUpdate = true; } } else { m_bFlushQueues[CommandListType] = true; } } else if (CommandListType != (UINT)m_DestinationCommandListType) { m_QueueFenceValuesToWaitOn[CommandListType] = std::max(m_QueueFenceValuesToWaitOn[CommandListType], SharedState.FenceValues[CommandListType]); } } } else { after |= SharedState.State[(UINT)m_DestinationCommandListType]; assert(!IsD3D12WriteState(after, SubresourceTransitionFlags::None)); auto& ExclusiveState = CurrentState.GetExclusiveSubresourceState(i); if (ExclusiveState.CommandListType != COMMAND_LIST_TYPE::UNKNOWN && ExclusiveState.CommandListType != m_DestinationCommandListType) { m_QueueFenceValuesToWaitOn[(UINT)ExclusiveState.CommandListType] = std::max(m_QueueFenceValuesToWaitOn[(UINT)ExclusiveState.CommandListType], ExclusiveState.FenceValue); } } if (bQueueStateUpdate) { AddCurrentStateUpdate(TransitionableResource, CurrentState, TransitionDesc.Transition.Subresource, after, PostExclusiveState, curCmdListType != COMMAND_LIST_TYPE::UNKNOWN); // throw( bad_alloc ) } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::SubmitResourceBarriers(_In_reads_(Count) D3D12_RESOURCE_BARRIER const * pBarriers, UINT Count, _In_ CommandListManager * pManager) noexcept { switch (pManager->GetCommandListType()) { case COMMAND_LIST_TYPE::GRAPHICS: pManager->GetGraphicsCommandList()->ResourceBarrier(Count, pBarriers); break; case COMMAND_LIST_TYPE::VIDEO_DECODE: pManager->GetVideoDecodeCommandList()->ResourceBarrier(Count, pBarriers); break; case COMMAND_LIST_TYPE::VIDEO_PROCESS: pManager->GetVideoProcessCommandList()->ResourceBarrier(Count, pBarriers); break; default: static_assert((UINT)COMMAND_LIST_TYPE::MAX_VALID == 3, "Need to update switch/case."); assert(false); break; } pManager->AdditionalCommandsAdded(); } //---------------------------------------------------------------------------------------------------------------------------------- template < typename TSubmitBarriersImpl, typename TSubmitCmdListImpl, typename THasCommandsImpl, typename TGetCurrentFenceImpl, typename TInsertDeferredWaitsImpl, typename TInsertQueueWaitImpl > void ResourceStateManagerBase::SubmitResourceTransitionsImpl( TSubmitBarriersImpl&& SubmitBarriersImpl, TSubmitCmdListImpl&& SubmitCmdListImpl, THasCommandsImpl&& HasCommandsImpl, TGetCurrentFenceImpl&& GetCurrentFenceImpl, TInsertDeferredWaitsImpl&& InsertDeferredWaitsImpl, TInsertQueueWaitImpl&& InsertQueueWaitImpl) { // Step 1: Submit any pending barrires on source command lists that are not the destination. for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (!m_vSrcResourceBarriers[i].empty()) { if (i == (UINT)m_DestinationCommandListType) { // We'll get to it later. continue; } // Submitting any barriers on a command list indicates it needs to be submitted before we're done. m_bFlushQueues[i] = true; SubmitBarriersImpl(m_vSrcResourceBarriers[i], (COMMAND_LIST_TYPE)i); } } if (m_DestinationCommandListType == COMMAND_LIST_TYPE::UNKNOWN) { // We've done all the necessary work. return; } // Step 2: Flush any necessary source command lists. for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (m_bFlushQueues[i]) { assert(i != (UINT)m_DestinationCommandListType); // Submitting a command list indicates it needs to be waited on before we're done. m_QueueFenceValuesToWaitOn[i] = GetCurrentFenceImpl((COMMAND_LIST_TYPE)i); SubmitCmdListImpl((COMMAND_LIST_TYPE)i); } } // Step 3: Flush the destination command list type if necessary bool bFlushDestination = (m_bApplySwapchainDeferredWaits && !m_SwapchainDeferredWaits.empty()) || !m_ResourceDeferredWaits.empty(); if (!bFlushDestination) { for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (m_InsertedQueueSync[(UINT)m_DestinationCommandListType][i] < m_QueueFenceValuesToWaitOn[i]) { bFlushDestination = true; break; } } } auto& SrcVec = m_vSrcResourceBarriers[(UINT)m_DestinationCommandListType]; if (bFlushDestination && HasCommandsImpl(m_DestinationCommandListType)) { // Submit any barriers that should go before the flush, if we have any. if (!SrcVec.empty()) { // TODO: Consider converting these into split barriers and putting ends in m_vDstResourceBarriers. SubmitBarriersImpl(SrcVec, m_DestinationCommandListType); } SubmitCmdListImpl(m_DestinationCommandListType); } else { // If we didn't flush, then these barriers on simultaneous access // resources need to be done concretely. m_vDstResourceBarriers.insert(m_vDstResourceBarriers.end(), m_vTentativeResourceBarriers.begin(), m_vTentativeResourceBarriers.end()); // throw( bad_alloc ) // And we'll add in the other concrete barriers that we haven't done yet. m_vDstResourceBarriers.insert(m_vDstResourceBarriers.end(), SrcVec.begin(), SrcVec.end()); // throw( bad_alloc ) } // Step 4: Insert sync InsertDeferredWaitsImpl(m_DestinationCommandListType); for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { if (m_InsertedQueueSync[(UINT)m_DestinationCommandListType][i] < m_QueueFenceValuesToWaitOn[i]) { InsertQueueWaitImpl(m_QueueFenceValuesToWaitOn[i], (COMMAND_LIST_TYPE)i, m_DestinationCommandListType); m_InsertedQueueSync[(UINT)m_DestinationCommandListType][i] = m_QueueFenceValuesToWaitOn[i]; } } // Step 5: Insert destination barriers if (!m_vDstResourceBarriers.empty()) { SubmitBarriersImpl(m_vDstResourceBarriers, m_DestinationCommandListType); } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::SubmitResourceTransitions(_In_reads_((UINT)COMMAND_LIST_TYPE::MAX_VALID) CommandListManager ** ppManagers) noexcept(false) { auto SubmitBarriersImpl = [this, ppManagers](std::vector& barriers, COMMAND_LIST_TYPE type) { this->SubmitResourceBarriers(barriers.data(), (UINT)barriers.size(), ppManagers[(UINT)type]); }; auto SubmitCmdListImpl = [this, ppManagers](COMMAND_LIST_TYPE type) { // Note: Command list may not have pending commands, but queue may have commands which need to have a signal inserted. ppManagers[(UINT)type]->PrepForCommandQueueSync(); // throws }; auto HasCommandsImpl = [this, ppManagers](COMMAND_LIST_TYPE type) { return ppManagers[(UINT)type]->ShouldFlushForResourceAcquire(); }; auto GetCurrentFenceImpl = [this, ppManagers](COMMAND_LIST_TYPE type) { return ppManagers[(UINT)type]->GetCommandListID(); }; auto InsertDeferredWaitsImpl = [this, ppManagers](COMMAND_LIST_TYPE type) { auto pDestinationManager = ppManagers[(UINT)type]; if (m_bApplySwapchainDeferredWaits) { for (auto& Wait : m_SwapchainDeferredWaits) { pDestinationManager->GetCommandQueue()->Wait(Wait.fence->Get(), Wait.value); Wait.fence->UsedInCommandList(pDestinationManager->GetCommandListType(), pDestinationManager->GetCommandListID()); } m_SwapchainDeferredWaits.clear(); } for (auto& Wait : m_ResourceDeferredWaits) { pDestinationManager->GetCommandQueue()->Wait(Wait.fence->Get(), Wait.value); Wait.fence->UsedInCommandList(pDestinationManager->GetCommandListType(), pDestinationManager->GetCommandListID()); } m_ResourceDeferredWaits.clear(); }; auto InsertQueueWaitImpl = [this, ppManagers](UINT64 value, COMMAND_LIST_TYPE src, COMMAND_LIST_TYPE dst) { if(src == dst || value <= ppManagers[(UINT)src]->GetFence()->GetCompletedValue()) { return; } ppManagers[(UINT)dst]->GetCommandQueue()->Wait(ppManagers[(UINT)src]->GetFence()->Get(), value); }; SubmitResourceTransitionsImpl(SubmitBarriersImpl, SubmitCmdListImpl, HasCommandsImpl, GetCurrentFenceImpl, InsertDeferredWaitsImpl, InsertQueueWaitImpl); } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManagerBase::SimulateSubmitResourceTransitions( std::function&, COMMAND_LIST_TYPE)> SubmitBarriers, std::function SubmitCmdList, std::function HasCommands, std::function GetCurrentFenceImpl, std::function InsertDeferredWaits, std::function InsertQueueWait) { SubmitResourceTransitionsImpl(SubmitBarriers, SubmitCmdList, HasCommands, GetCurrentFenceImpl, InsertDeferredWaits, InsertQueueWait); } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManager::TransitionResource(Resource* pResource, D3D12_RESOURCE_STATES State, COMMAND_LIST_TYPE DestinationCommandListType, SubresourceTransitionFlags Flags) noexcept { CDesiredResourceState::SubresourceInfo DesiredState = { State, DestinationCommandListType, Flags }; ResourceStateManagerBase::TransitionResource(*pResource, DesiredState); } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManager::TransitionSubresources(Resource* pResource, CViewSubresourceSubset const & Subresources, D3D12_RESOURCE_STATES State, COMMAND_LIST_TYPE DestinationCommandListType, SubresourceTransitionFlags Flags) noexcept { CDesiredResourceState::SubresourceInfo DesiredState = { State, DestinationCommandListType, Flags }; ResourceStateManagerBase::TransitionSubresources(*pResource, Subresources, DesiredState); } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManager::TransitionSubresource(Resource* pResource, UINT SubresourceIndex, D3D12_RESOURCE_STATES State, COMMAND_LIST_TYPE DestinationCommandListType, SubresourceTransitionFlags Flags) noexcept { CDesiredResourceState::SubresourceInfo DesiredState = { State, DestinationCommandListType, Flags }; ResourceStateManagerBase::TransitionSubresource(*pResource, SubresourceIndex, DesiredState); } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManager::TransitionResourceForBindings(Resource* pResource) noexcept { auto& Bindings = pResource->GetBindingState(); if (Bindings.AreAllSubresourcesTheSame()) { TransitionResource(pResource, Bindings.GetD3D12ResourceUsageFromBindings(0), COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags::TransitionPreDraw); } else { for (UINT i = 0; i < pResource->NumSubresources(); ++i) { TransitionSubresource(pResource, i, Bindings.GetD3D12ResourceUsageFromBindings(i), COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags::TransitionPreDraw); } } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManager::TransitionSubresourcesForBindings(Resource* pResource, CViewSubresourceSubset const & Subresources) noexcept { auto& Bindings = pResource->GetBindingState(); if (Bindings.AreAllSubresourcesTheSame()) { TransitionResource(pResource, Bindings.GetD3D12ResourceUsageFromBindings(0), COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags::TransitionPreDraw); } else { for (auto range : Subresources) { for (UINT i = range.first; i < range.second; ++i) { TransitionSubresource(pResource, i, Bindings.GetD3D12ResourceUsageFromBindings(i), COMMAND_LIST_TYPE::GRAPHICS, SubresourceTransitionFlags::TransitionPreDraw); } } } } //---------------------------------------------------------------------------------------------------------------------------------- void ResourceStateManager::ApplyAllResourceTransitions(bool bIsPreDraw) noexcept(false) { CommandListManager* pCommandListManagers[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; UINT64 CurrentFenceValues[(UINT)COMMAND_LIST_TYPE::MAX_VALID]; for (UINT i = 0; i < (UINT)COMMAND_LIST_TYPE::MAX_VALID; ++i) { pCommandListManagers[i] = m_ImmCtx.GetCommandListManager((COMMAND_LIST_TYPE)i); CurrentFenceValues[i] = pCommandListManagers[i] ? pCommandListManagers[i]->GetCommandListID() : 1ull; } ApplyResourceTransitionsPreamble(); ForEachTransitioningResource([=](TransitionableResourceBase& ResourceBase) -> TransitionResult { Resource& CurResource = static_cast(ResourceBase); return ProcessTransitioningResource( CurResource.GetUnderlyingResource(), CurResource, CurResource.GetIdentity()->m_currentState, CurResource.GetBindingState(), CurResource.NumSubresources(), CurrentFenceValues, bIsPreDraw); // throw( bad_alloc ) }); SubmitResourceTransitions(pCommandListManagers); // throw( bad_alloc ) UINT64 NewCommandListID = 0ull; if (m_DestinationCommandListType != COMMAND_LIST_TYPE::UNKNOWN) { NewCommandListID = pCommandListManagers[(UINT)m_DestinationCommandListType]->GetCommandListID(); } PostSubmitUpdateState([=](PostApplyUpdate const& update, COMMAND_LIST_TYPE CmdListType, UINT64 FenceValue) { static_cast(update.AffectedResource).UsedInCommandList(CmdListType, FenceValue); m_ImmCtx.GetCommandListManager(CmdListType)->SetNeedSubmitFence(); }, CurrentFenceValues, NewCommandListID); } }; ================================================ FILE: src/RootSignature.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include namespace D3D12TranslationLayer { void RootSignatureBase::Create(D3D12_VERSIONED_ROOT_SIGNATURE_DESC const& rootDesc) noexcept(false) { CComPtr spBlob; ThrowFailure(D3D12SerializeVersionedRootSignature(&rootDesc, &spBlob, NULL)); Create(spBlob->GetBufferPointer(), spBlob->GetBufferSize()); } void RootSignatureBase::Create(const void* pBlob, SIZE_T BlobSize) noexcept(false) { Destroy(); ThrowFailure(m_pParent->m_pDevice12->CreateRootSignature(m_pParent->GetNodeMask(), pBlob, BlobSize, IID_PPV_ARGS(GetForCreate()))); } D3D12_SHADER_VISIBILITY GetShaderVisibility(EShaderStage stage) { switch (stage) { case e_VS: return D3D12_SHADER_VISIBILITY_VERTEX; case e_PS: return D3D12_SHADER_VISIBILITY_PIXEL; case e_GS: return D3D12_SHADER_VISIBILITY_GEOMETRY; case e_HS: return D3D12_SHADER_VISIBILITY_HULL; case e_DS: return D3D12_SHADER_VISIBILITY_DOMAIN; default: return D3D12_SHADER_VISIBILITY_ALL; } } void RootSignatureDesc::GetAsD3D12Desc(VersionedRootSignatureDescWithStorage& Storage, ImmediateContext* pParent) const { const bool bGraphics = (m_Flags & Compute) == 0; constexpr UINT MaxShaderStages = std::extent::value; const UINT NumShaderStages = bGraphics ? MaxShaderStages : 1; UINT RangeIndex = 0; UINT ParameterIndex = 0; bool bCB14 = false; D3D12_DESCRIPTOR_RANGE_FLAGS RangeFlags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; if (m_Flags & RequiresBufferOutOfBoundsHandling) { RangeFlags |= D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS; } // else STATIC (default) static constexpr D3D12_DESCRIPTOR_RANGE_FLAGS InterfacesRangeFlags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE | D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; UINT NumSRVRanges = std::accumulate(m_NumSRVSpacesUsed, std::end(m_NumSRVSpacesUsed), 0); CD3DX12_DESCRIPTOR_RANGE1* pSRVRanges; if (NumSRVRanges > MaxShaderStages) { Storage.InterfacesSRVRanges.resize(NumSRVRanges); pSRVRanges = Storage.InterfacesSRVRanges.data(); } else { pSRVRanges = std::end(Storage.DescriptorRanges) - NumSRVRanges; } for (UINT i = 0; i < NumShaderStages; ++i) { ASSUME(bGraphics || i == 0); EShaderStage StageEnum = bGraphics ? (EShaderStage)i : e_CS; auto Visibility = GetShaderVisibility(StageEnum); ShaderStage const& Stage = m_ShaderStages[i]; bCB14 |= Stage.IsCB14(); Storage.Parameter[ParameterIndex].InitAsDescriptorTable(1, &Storage.DescriptorRanges[RangeIndex], Visibility); Storage.DescriptorRanges[RangeIndex++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, Stage.GetCBBindingCount(), 0, 0, RangeFlags); if (Stage.m_UsesShaderInterfaces) { Storage.DescriptorRanges[RangeIndex++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, D3D12_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT, 0, 1, InterfacesRangeFlags, 0); Storage.Parameter[ParameterIndex].DescriptorTable.NumDescriptorRanges++; } ++ParameterIndex; Storage.Parameter[ParameterIndex].InitAsDescriptorTable(m_NumSRVSpacesUsed[i], pSRVRanges, Visibility); for (UINT range = 0; range < m_NumSRVSpacesUsed[i]; ++range) { pSRVRanges->Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, Stage.GetSRVBindingCount(), 0, range, range == 0 ? RangeFlags : InterfacesRangeFlags, 0); ++pSRVRanges; } ++ParameterIndex; if (pParent->ComputeOnly()) { // Dummy descriptor range just to make the root parameter constants line up Storage.Parameter[ParameterIndex].InitAsDescriptorTable(1, &Storage.DescriptorRanges[RangeIndex], Visibility); Storage.DescriptorRanges[RangeIndex++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, m_NumSRVSpacesUsed[i] + 1, RangeFlags); } else { Storage.Parameter[ParameterIndex].InitAsDescriptorTable(1, &Storage.DescriptorRanges[RangeIndex], Visibility); Storage.DescriptorRanges[RangeIndex++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, Stage.GetSamplerBindingCount(), 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_NONE); if (Stage.m_UsesShaderInterfaces) { Storage.DescriptorRanges[RangeIndex++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, D3D12_COMMONSHADER_SAMPLER_SLOT_COUNT, 0, 1, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0); Storage.DescriptorRanges[RangeIndex++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, D3D12_COMMONSHADER_SAMPLER_SLOT_COUNT, 0, 2, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE, 0); Storage.Parameter[ParameterIndex].DescriptorTable.NumDescriptorRanges += 2; } } ++ParameterIndex; } Storage.Parameter[ParameterIndex].InitAsDescriptorTable(1, &Storage.DescriptorRanges[RangeIndex], D3D12_SHADER_VISIBILITY_ALL); Storage.DescriptorRanges[RangeIndex++].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, GetUAVBindingCount(), 0, 0, RangeFlags); ++ParameterIndex; assert(ParameterIndex <= VersionedRootSignatureDescWithStorage::c_NumParameters); assert(RangeIndex <= VersionedRootSignatureDescWithStorage::c_NumParameters + VersionedRootSignatureDescWithStorage::c_NumExtraRangesForInterfaces); const D3D12_ROOT_SIGNATURE_FLAGS BaseFlags = !bGraphics ? D3D12_ROOT_SIGNATURE_FLAG_NONE : D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT | D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT; D3D12_ROOT_SIGNATURE_FLAGS Flags = BaseFlags | (bCB14 ? ROOT_SIGNATURE_FLAG_ALLOW_LOW_TIER_RESERVED_HW_CB_LIMIT : D3D12_ROOT_SIGNATURE_FLAG_NONE); Storage.RootDesc.Init_1_1(ParameterIndex, Storage.Parameter, 0, NULL, Flags); } }; ================================================ FILE: src/Shader.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { Shader::Shader(ImmediateContext* pParent, std::unique_ptr DXBC, CComHeapPtr& DXIL, SIZE_T dxilSize, SShaderDecls PrecomputedDecls) : DeviceChild(pParent) , SShaderDecls(std::move(PrecomputedDecls)) , m_ByteCode(std::move(DXBC)) , m_Dxil(DXIL.Detach()) , m_Desc({ m_Dxil, dxilSize }) { } Shader::Shader(ImmediateContext* pParent, const void* byteCode, SIZE_T bytecodeSize, SShaderDecls PrecomputedDecls) : DeviceChild(pParent) , SShaderDecls(std::move(PrecomputedDecls)) , m_Desc({ byteCode, bytecodeSize }) { } }; ================================================ FILE: src/ShaderBinary.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include /*==========================================================================; * * D3D10ShaderBinary namespace * ***************************************************************************/ namespace D3D10ShaderBinary { BOOL IsOpCodeValid(D3D10_SB_OPCODE_TYPE OpCode) { return OpCode < D3D10_SB_NUM_OPCODES; } UINT GetNumInstructionOperands(D3D10_SB_OPCODE_TYPE OpCode) { if (IsOpCodeValid(OpCode)) return g_InstructionInfo[OpCode].m_NumOperands; else throw E_FAIL; } CInstructionInfo g_InstructionInfo[D3D10_SB_NUM_OPCODES]; bool static g_bInstructionInfoInited = false; void InitInstructionInfo() { #define SET(OpCode, Name, NumOperands, PrecMask, OpClass) \ (g_InstructionInfo[OpCode].Set(NumOperands, Name, OpClass, PrecMask)) if (g_bInstructionInfoInited) return; SET (D3D10_SB_OPCODE_ADD, "add", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_AND, "and", 3, 0x06, D3D10_SB_BIT_OP); SET (D3D10_SB_OPCODE_BREAK, "break", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_BREAKC, "breakc", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_CALL, "call", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_CALLC, "callc", 2, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_CONTINUE, "continue", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_CONTINUEC, "continuec", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_CASE, "case", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_CUT, "cut", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_DEFAULT, "default", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_DISCARD, "discard", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_DIV, "div", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_DP2, "dp2", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_DP3, "dp3", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_DP4, "dp4", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_ELSE, "else", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_EMIT, "emit", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_EMITTHENCUT, "emit_then_cut", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_ENDIF, "endif", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_ENDLOOP, "endloop", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_ENDSWITCH, "endswitch", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_EQ, "eq", 3, 0x00, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_EXP, "exp", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_FRC, "frc", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_FTOI, "ftoi", 2, 0x00, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_FTOU, "ftou", 2, 0x00, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_GE, "ge", 3, 0x00, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_DERIV_RTX, "deriv_rtx", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_DERIV_RTY, "deriv_rty", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_IADD, "iadd", 3, 0x06, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_IF, "if", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_IEQ, "ieq", 3, 0x00, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_IGE, "ige", 3, 0x00, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_ILT, "ilt", 3, 0x00, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_IMAD, "imad", 4, 0x0e, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_IMAX, "imax", 3, 0x06, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_IMIN, "imin", 3, 0x06, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_IMUL, "imul", 4, 0x0c, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_INE, "ine", 3, 0x00, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_INEG, "ineg", 2, 0x02, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_ISHL, "ishl", 3, 0x02, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_ISHR, "ishr", 3, 0x02, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_ITOF, "itof", 2, 0x00, D3D10_SB_INT_OP); SET (D3D10_SB_OPCODE_LABEL, "label", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_LD, "ld", 3, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_LD_MS, "ldms", 4, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_LOG, "log", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_LOOP, "loop", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_LT, "lt", 3, 0x00, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_MAD, "mad", 4, 0x0e, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_MAX, "max", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_MIN, "min", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_MOV, "mov", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_MOVC, "movc", 4, 0x0c, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_MUL, "mul", 3, 0x06, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_NE, "ne", 3, 0x00, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_NOP, "nop", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_NOT, "not", 2, 0x02, D3D10_SB_BIT_OP); SET (D3D10_SB_OPCODE_OR, "or", 3, 0x06, D3D10_SB_BIT_OP); SET (D3D10_SB_OPCODE_RESINFO, "resinfo", 3, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_RET, "ret", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_RETC, "retc", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_ROUND_NE, "round_ne", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_ROUND_NI, "round_ni", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_ROUND_PI, "round_pi", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_ROUND_Z, "round_z", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_RSQ, "rsq", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_SAMPLE, "sample", 4, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_SAMPLE_B, "sample_b", 5, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_SAMPLE_L, "sample_l", 5, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_SAMPLE_D, "sample_d", 6, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_SAMPLE_C, "sample_c", 5, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_SAMPLE_C_LZ, "sample_c_lz", 5, 0x00, D3D10_SB_TEX_OP); SET (D3D10_SB_OPCODE_SQRT, "sqrt", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_SWITCH, "switch", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_SINCOS, "sincos", 3, 0x04, D3D10_SB_FLOAT_OP); SET (D3D10_SB_OPCODE_UDIV, "udiv", 4, 0x0c, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_ULT, "ult", 3, 0x00, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_UGE, "uge", 3, 0x00, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_UMAX, "umax", 3, 0x06, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_UMIN, "umin", 3, 0x06, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_UMUL, "umul", 4, 0x0c, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_UMAD, "umad", 4, 0x0e, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_USHR, "ushr", 3, 0x02, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_UTOF, "utof", 2, 0x00, D3D10_SB_UINT_OP); SET (D3D10_SB_OPCODE_XOR, "xor", 3, 0x06, D3D10_SB_BIT_OP); SET (D3D10_SB_OPCODE_RESERVED0, "jmp", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D10_SB_OPCODE_DCL_INPUT, "dcl_input", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_OUTPUT, "dcl_output", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_INPUT_SGV, "dcl_input_sgv", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, "dcl_input_ps_sgv", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, "dcl_inputprimitive", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, "dcl_outputtopology", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, "dcl_maxout", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_INPUT_PS, "dcl_input_ps", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, "dcl_constantbuffer", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_SAMPLER, "dcl_sampler", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_RESOURCE, "dcl_resource", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_INPUT_SIV, "dcl_input_siv", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, "dcl_input_ps_siv", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_OUTPUT_SIV, "dcl_output_siv", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_OUTPUT_SGV, "dcl_output_sgv", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_TEMPS, "dcl_temps", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, "dcl_indexableTemp", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_INDEX_RANGE, "dcl_indexrange", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, "dcl_globalFlags", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D10_1_SB_OPCODE_SAMPLE_INFO, "sampleinfo", 2, 0x00, D3D10_SB_TEX_OP); SET (D3D10_1_SB_OPCODE_SAMPLE_POS, "samplepos", 3, 0x00, D3D10_SB_TEX_OP); SET (D3D10_1_SB_OPCODE_GATHER4, "gather4", 4, 0x00, D3D10_SB_TEX_OP); SET (D3D10_1_SB_OPCODE_LOD, "lod", 4, 0x00, D3D10_SB_TEX_OP); SET (D3D11_SB_OPCODE_EMIT_STREAM, "emit_stream", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D11_SB_OPCODE_CUT_STREAM, "cut_stream", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D11_SB_OPCODE_EMITTHENCUT_STREAM, "emit_then_cut_stream", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D11_SB_OPCODE_INTERFACE_CALL, "fcall", 1, 0x00, D3D10_SB_FLOW_OP); SET (D3D11_SB_OPCODE_DCL_STREAM, "dcl_stream", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_FUNCTION_BODY, "dcl_function_body", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_FUNCTION_TABLE, "dcl_function_table", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_INTERFACE, "dcl_interface", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_BUFINFO, "bufinfo", 2, 0x00, D3D10_SB_TEX_OP); SET (D3D11_SB_OPCODE_DERIV_RTX_COARSE, "deriv_rtx_coarse", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_DERIV_RTX_FINE, "deriv_rtx_fine", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_DERIV_RTY_COARSE, "deriv_rty_coarse", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_DERIV_RTY_FINE, "deriv_rty_fine", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_GATHER4_C, "gather4_c", 5, 0x00, D3D10_SB_TEX_OP); SET (D3D11_SB_OPCODE_GATHER4_PO, "gather4_po", 5, 0x00, D3D10_SB_TEX_OP); SET (D3D11_SB_OPCODE_GATHER4_PO_C, "gather4_po_c", 6, 0x00, D3D10_SB_TEX_OP); SET (D3D11_SB_OPCODE_RCP, "rcp", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_F32TOF16, "f32tof16", 2, 0x00, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_F16TOF32, "f16tof32", 2, 0x00, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_UADDC, "uaddc", 4, 0x0c, D3D10_SB_UINT_OP); SET (D3D11_SB_OPCODE_USUBB, "usubb", 4, 0x0c, D3D10_SB_UINT_OP); SET (D3D11_SB_OPCODE_COUNTBITS, "countbits", 2, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_FIRSTBIT_HI, "firstbit_hi", 2, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_FIRSTBIT_LO, "firstbit_lo", 2, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_FIRSTBIT_SHI, "firstbit_shi", 2, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_UBFE, "ubfe", 4, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_IBFE, "ibfe", 4, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_BFI, "bfi", 5, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_BFREV, "bfrev", 2, 0x02, D3D10_SB_BIT_OP); SET (D3D11_SB_OPCODE_SWAPC, "swapc", 5, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_HS_DECLS, "hs_decls", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE, "hs_control_point_phase", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_HS_FORK_PHASE, "hs_fork_phase", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_HS_JOIN_PHASE, "hs_join_phase", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, "dcl_input_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, "dcl_output_control_point_count", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_TESS_DOMAIN, "dcl_tessellator_domain", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_TESS_PARTITIONING, "dcl_tessellator_partitioning", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, "dcl_tessellator_output_primitive", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR, "dcl_hs_max_tessfactor", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, "dcl_hs_fork_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, "dcl_hs_join_phase_instance_count", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_THREAD_GROUP, "dcl_thread_group", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, "dcl_uav_typed", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, "dcl_uav_raw", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, "dcl_uav_structured", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, "dcl_tgsm_raw", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, "dcl_tgsm_structured", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_RESOURCE_RAW, "dcl_resource_raw", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED, "dcl_resource_structured", 1, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_LD_UAV_TYPED, "ld_uav_typed", 3, 0x00, D3D11_SB_MEM_OP); SET (D3D11_SB_OPCODE_STORE_UAV_TYPED, "store_uav_typed", 3, 0x00, D3D11_SB_MEM_OP); SET (D3D11_SB_OPCODE_LD_RAW, "ld_raw", 3, 0x00, D3D11_SB_MEM_OP); SET (D3D11_SB_OPCODE_STORE_RAW, "store_raw", 3, 0x00, D3D11_SB_MEM_OP); SET (D3D11_SB_OPCODE_LD_STRUCTURED, "ld_structured", 4, 0x00, D3D11_SB_MEM_OP); SET (D3D11_SB_OPCODE_STORE_STRUCTURED, "store_structured", 4, 0x00, D3D11_SB_MEM_OP); SET (D3D11_SB_OPCODE_ATOMIC_AND, "atomic_and", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_OR, "atomic_or", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_XOR, "atomic_xor", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_CMP_STORE, "atomic_cmp_store", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_IADD, "atomic_iadd", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_IMAX, "atomic_imax", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_IMIN, "atomic_imin", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_UMAX, "atomic_umax", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_ATOMIC_UMIN, "atomic_umin", 3, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_ALLOC, "imm_atomic_alloc", 2, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_CONSUME, "imm_atomic_consume", 2, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_IADD, "imm_atomic_iadd", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_AND, "imm_atomic_and", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_OR, "imm_atomic_or", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_XOR, "imm_atomic_xor", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_EXCH, "imm_atomic_exch", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_CMP_EXCH, "imm_atomic_cmp_exch", 5, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_IMAX, "imm_atomic_imax", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_IMIN, "imm_atomic_imin", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_UMAX, "imm_atomic_umax", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_IMM_ATOMIC_UMIN, "imm_atomic_umin", 4, 0x00, D3D11_SB_ATOMIC_OP); SET (D3D11_SB_OPCODE_SYNC, "sync", 0, 0x00, D3D10_SB_FLOW_OP); SET (D3D11_SB_OPCODE_EVAL_SNAPPED, "eval_snapped", 3, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX, "eval_sample_index", 3, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_EVAL_CENTROID, "eval_centroid", 2, 0x02, D3D10_SB_FLOAT_OP); SET (D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT, "dcl_gsinstances", 0, 0x00, D3D10_SB_DCL_OP); SET (D3D11_SB_OPCODE_DADD, "dadd", 3, 0x06, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DMAX, "dmax", 3, 0x06, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DMIN, "dmin", 3, 0x06, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DMUL, "dmul", 3, 0x06, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DEQ, "deq", 3, 0x00, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DGE, "dge", 3, 0x00, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DLT, "dlt", 3, 0x00, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DNE, "dne", 3, 0x00, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DMOV, "dmov", 2, 0x02, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DMOVC, "dmovc", 4, 0x0c, D3D11_SB_DOUBLE_OP); SET (D3D11_SB_OPCODE_DTOF, "dtof", 2, 0x02, D3D11_SB_DOUBLE_TO_FLOAT_OP); SET (D3D11_SB_OPCODE_FTOD, "ftod", 2, 0x00, D3D11_SB_FLOAT_TO_DOUBLE_OP); SET (D3D11_SB_OPCODE_ABORT, "abort", 0, 0x00, D3D11_SB_DEBUG_OP); SET (D3D11_SB_OPCODE_DEBUG_BREAK, "debug_break", 0, 0x00, D3D11_SB_DEBUG_OP); SET (D3D11_1_SB_OPCODE_DDIV, "ddiv", 3, 0x06, D3D11_SB_DOUBLE_OP); SET (D3D11_1_SB_OPCODE_DFMA, "dfma", 4, 0x0e, D3D11_SB_DOUBLE_OP); SET (D3D11_1_SB_OPCODE_DRCP, "drcp", 2, 0x02, D3D11_SB_DOUBLE_OP); SET (D3D11_1_SB_OPCODE_MSAD, "msad", 4, 0x0e, D3D10_SB_UINT_OP); SET (D3D11_1_SB_OPCODE_DTOI, "dtoi", 2, 0x00, D3D11_SB_DOUBLE_OP); SET (D3D11_1_SB_OPCODE_DTOU, "dtou", 2, 0x00, D3D11_SB_DOUBLE_OP); SET (D3D11_1_SB_OPCODE_ITOD, "itod", 2, 0x00, D3D10_SB_INT_OP); SET (D3D11_1_SB_OPCODE_UTOD, "utod", 2, 0x00, D3D10_SB_UINT_OP); SET (D3DWDDM1_3_SB_OPCODE_GATHER4_FEEDBACK,"gather4_s", 5, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_GATHER4_C_FEEDBACK,"gather4_c_s", 6, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_GATHER4_PO_FEEDBACK,"gather4_po_s", 6, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_GATHER4_PO_C_FEEDBACK,"gather4_po_c_s", 7, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_LD_FEEDBACK,"ld_s", 4, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_LD_MS_FEEDBACK,"ldms_s", 5, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_LD_UAV_TYPED_FEEDBACK,"ld_uav_typed_s", 4, 0x00, D3D11_SB_MEM_OP); SET (D3DWDDM1_3_SB_OPCODE_LD_RAW_FEEDBACK,"ld_raw_s", 4, 0x00, D3D11_SB_MEM_OP); SET (D3DWDDM1_3_SB_OPCODE_LD_STRUCTURED_FEEDBACK,"ld_structured_s", 5, 0x00, D3D11_SB_MEM_OP); SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_L_FEEDBACK,"sample_l_s", 6, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_C_LZ_FEEDBACK,"sample_c_lz_s", 6, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_CLAMP_FEEDBACK, "sample_cl_s", 6, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_B_CLAMP_FEEDBACK, "sample_b_cl_s", 7, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_D_CLAMP_FEEDBACK,"sample_d_cl_s", 8, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_SAMPLE_C_CLAMP_FEEDBACK,"sample_c_cl_s", 7, 0x00, D3D10_SB_TEX_OP); SET (D3DWDDM1_3_SB_OPCODE_CHECK_ACCESS_FULLY_MAPPED, "check_access_fully_mapped",2, 0x00, D3D10_SB_TEX_OP); g_bInstructionInfoInited = true; } //***************************************************************************** // // CShaderCodeParser // //***************************************************************************** void CShaderCodeParser::SetShader(CONST CShaderToken* pBuffer) { m_pShaderCode = (CShaderToken*)pBuffer; m_pShaderEndToken = (CShaderToken*)pBuffer + pBuffer[1]; // First OpCode token m_pCurrentToken = (CShaderToken*)&pBuffer[2]; } D3D10_SB_TOKENIZED_PROGRAM_TYPE CShaderCodeParser::ShaderType() { return (D3D10_SB_TOKENIZED_PROGRAM_TYPE)DECODE_D3D10_SB_TOKENIZED_PROGRAM_TYPE(*m_pShaderCode); } UINT CShaderCodeParser::CurrentTokenOffset() { return (UINT)(m_pCurrentToken - m_pShaderCode); } UINT CShaderCodeParser::ShaderLengthInTokens() { return m_pShaderCode[1]; } UINT CShaderCodeParser::ShaderMinorVersion() { return DECODE_D3D10_SB_TOKENIZED_PROGRAM_MINOR_VERSION(m_pShaderCode[0]); } UINT CShaderCodeParser::ShaderMajorVersion() { return DECODE_D3D10_SB_TOKENIZED_PROGRAM_MAJOR_VERSION(m_pShaderCode[0]); } void CShaderCodeParser::ParseIndex(COperandIndex* pOperandIndex, D3D10_SB_OPERAND_INDEX_REPRESENTATION IndexType) { switch (IndexType) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: pOperandIndex->m_RegIndex = *m_pCurrentToken++; pOperandIndex->m_ComponentName = D3D10_SB_4_COMPONENT_X; pOperandIndex->m_RelRegType = D3D10_SB_OPERAND_TYPE_IMMEDIATE32; break; case D3D10_SB_OPERAND_INDEX_IMMEDIATE64: pOperandIndex->m_RegIndexA[0] = *m_pCurrentToken++; pOperandIndex->m_RegIndexA[1] = *m_pCurrentToken++; pOperandIndex->m_ComponentName = D3D10_SB_4_COMPONENT_X; pOperandIndex->m_RelRegType = D3D10_SB_OPERAND_TYPE_IMMEDIATE64; break; case D3D10_SB_OPERAND_INDEX_RELATIVE: { COperand operand; ParseOperand(&operand); pOperandIndex->m_RelIndex = operand.m_Index[0].m_RegIndex; pOperandIndex->m_RelIndex1 = operand.m_Index[1].m_RegIndex; pOperandIndex->m_RelRegType = operand.m_Type; pOperandIndex->m_IndexDimension = operand.m_IndexDimension; pOperandIndex->m_ComponentName = operand.m_ComponentName; pOperandIndex->m_MinPrecision = operand.m_MinPrecision; break; } case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: { pOperandIndex->m_RegIndex = *m_pCurrentToken++; COperand operand; ParseOperand(&operand); pOperandIndex->m_RelIndex = operand.m_Index[0].m_RegIndex; pOperandIndex->m_RelIndex1 = operand.m_Index[1].m_RegIndex; pOperandIndex->m_RelRegType = operand.m_Type; pOperandIndex->m_IndexDimension = operand.m_IndexDimension; pOperandIndex->m_ComponentName = operand.m_ComponentName; pOperandIndex->m_MinPrecision = operand.m_MinPrecision; } break; default: throw E_FAIL; } } void CShaderCodeParser::ParseOperand(COperandBase* pOperand) { CShaderToken Token = *m_pCurrentToken++; pOperand->m_Type = DECODE_D3D10_SB_OPERAND_TYPE(Token); pOperand->m_NumComponents = DECODE_D3D10_SB_OPERAND_NUM_COMPONENTS(Token); pOperand->m_bExtendedOperand = DECODE_IS_D3D10_SB_OPERAND_EXTENDED(Token); UINT NumComponents = 0; switch (pOperand->m_NumComponents) { case D3D10_SB_OPERAND_1_COMPONENT: NumComponents = 1; break; case D3D10_SB_OPERAND_4_COMPONENT: NumComponents = 4; break; } switch (pOperand->m_Type) { case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: case D3D10_SB_OPERAND_TYPE_IMMEDIATE64: break; default: { if (pOperand->m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT) { // Component selection mode pOperand->m_ComponentSelection = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(Token); switch(pOperand->m_ComponentSelection) { case D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE: pOperand->m_WriteMask = DECODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(Token); break; case D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE: pOperand->m_Swizzle[0] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 0); pOperand->m_Swizzle[1] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 1); pOperand->m_Swizzle[2] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 2); pOperand->m_Swizzle[3] = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_SOURCE(Token, 3); break; case D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE: { D3D10_SB_4_COMPONENT_NAME Component = DECODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(Token); pOperand->m_Swizzle[0] = (BYTE)Component; pOperand->m_Swizzle[1] = (BYTE)Component; pOperand->m_Swizzle[2] = (BYTE)Component; pOperand->m_Swizzle[3] = (BYTE)Component; pOperand->m_ComponentName = Component; break; } default: throw E_FAIL; } } pOperand->m_IndexDimension = DECODE_D3D10_SB_OPERAND_INDEX_DIMENSION(Token); if (pOperand->m_IndexDimension != D3D10_SB_OPERAND_INDEX_0D) { UINT NumDimensions = pOperand->m_IndexDimension; // Index representation for (UINT i=0; i < NumDimensions; i++) { pOperand->m_IndexType[i] = DECODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(i, Token); } } break; } } // Extended operand if (pOperand->m_bExtendedOperand) { Token = *m_pCurrentToken++; pOperand->m_ExtendedOperandType = DECODE_D3D10_SB_EXTENDED_OPERAND_TYPE(Token); if (pOperand->m_ExtendedOperandType == D3D10_SB_EXTENDED_OPERAND_MODIFIER) { pOperand->m_Modifier = DECODE_D3D10_SB_OPERAND_MODIFIER(Token); pOperand->m_MinPrecision = DECODE_D3D11_SB_OPERAND_MIN_PRECISION(Token); } } switch( pOperand->m_Type ) { case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: case D3D10_SB_OPERAND_TYPE_IMMEDIATE64: for (UINT i=0 ; i < NumComponents; i++) { pOperand->m_Value[i] = *m_pCurrentToken++; } break; } // Operand indices if (pOperand->m_IndexDimension != D3D10_SB_OPERAND_INDEX_0D) { const UINT NumDimensions = pOperand->m_IndexDimension; // Index representation for (UINT i=0; i < NumDimensions; i++) { ParseIndex(&pOperand->m_Index[i], pOperand->m_IndexType[i]); } } } void CShaderCodeParser::ParseInstruction(CInstruction* pInstruction) { pInstruction->Clear(true); CShaderToken* pStart = m_pCurrentToken; CShaderToken Token = *m_pCurrentToken++; pInstruction->m_OpCode = DECODE_D3D10_SB_OPCODE_TYPE(Token); pInstruction->m_PreciseMask = DECODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(Token); pInstruction->m_bSaturate = DECODE_IS_D3D10_SB_INSTRUCTION_SATURATE_ENABLED(Token); UINT InstructionLength = DECODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(Token); pInstruction->m_NumOperands = GetNumInstructionOperands(pInstruction->m_OpCode); BOOL bExtended = DECODE_IS_D3D10_SB_OPCODE_EXTENDED(Token); if( bExtended && ( (pInstruction->m_OpCode == D3D11_SB_OPCODE_DCL_INTERFACE)|| (pInstruction->m_OpCode == D3D11_SB_OPCODE_DCL_FUNCTION_TABLE))) { pInstruction->m_ExtendedOpCodeCount = 1; // these instructions may be longer than can fit in the normal instructionlength field InstructionLength = (UINT)(*m_pCurrentToken++); } else { pInstruction->m_ExtendedOpCodeCount = 0; for(int i = 0; i < (bExtended ? D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES : 0); i++) { pInstruction->m_ExtendedOpCodeCount++; CShaderToken ExtToken = *m_pCurrentToken++; bExtended = DECODE_IS_D3D10_SB_OPCODE_EXTENDED(ExtToken); pInstruction->m_OpCodeEx[i] = DECODE_D3D10_SB_EXTENDED_OPCODE_TYPE(ExtToken); switch(pInstruction->m_OpCodeEx[i]) { case D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS: { pInstruction->m_TexelOffset[0] = (INT8)DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U, ExtToken); pInstruction->m_TexelOffset[1] = (INT8)DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V, ExtToken); pInstruction->m_TexelOffset[2] = (INT8)DECODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W, ExtToken); for(UINT j = 0;j < 3;j++) { if(pInstruction->m_TexelOffset[j] & 0x8) pInstruction->m_TexelOffset[j] |= 0xfffffff0; } break; } break; case D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM: { pInstruction->m_ResourceDimEx = DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(ExtToken); pInstruction->m_ResourceDimStructureStrideEx = DECODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(ExtToken); } break; case D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE: { for(UINT j = 0; j < 4; j++) { pInstruction->m_ResourceReturnTypeEx[j] = DECODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(ExtToken,j); } } break; } if( !bExtended ) { break; } } } switch (pInstruction->m_OpCode) { case D3D10_SB_OPCODE_CUSTOMDATA: pInstruction->m_PreciseMask = 0; pInstruction->m_bSaturate = false; pInstruction->m_NumOperands = 0; // not bothering to keep custom-data for now. TODO: store pInstruction->m_CustomData.Type = DECODE_D3D10_SB_CUSTOMDATA_CLASS(Token); InstructionLength = *m_pCurrentToken; if (*m_pCurrentToken <2) { InstructionLength = 2; pInstruction->m_CustomData.pData = 0; pInstruction->m_CustomData.DataSizeInBytes = 0; } else { pInstruction->m_CustomData.DataSizeInBytes = (*m_pCurrentToken-2)*4; pInstruction->m_CustomData.pData = malloc((*m_pCurrentToken - 2)*sizeof(UINT)); if( NULL == pInstruction->m_CustomData.pData ) { throw E_OUTOFMEMORY; } memcpy(pInstruction->m_CustomData.pData, m_pCurrentToken+1, (*m_pCurrentToken - 2)*4); switch(pInstruction->m_CustomData.Type) { case D3D11_SB_CUSTOMDATA_SHADER_MESSAGE: { CShaderMessage* pMessage = &pInstruction->m_CustomData.ShaderMessage; UINT Length = pInstruction->m_CustomData.DataSizeInBytes / 4; UINT* pData = (UINT*)pInstruction->m_CustomData.pData; ZeroMemory(pMessage, sizeof(*pMessage)); if (Length < 6) { break; } UINT StrChars = pData[2]; // Add one for the terminator and then round up. UINT StrWords = (StrChars + sizeof(DWORD)) / sizeof(DWORD); UINT NumOperands = pData[3]; UINT OpLength = pData[4]; // Enforce some basic sanity size limits. if (OpLength >= 0x10000 || NumOperands >= 0x1000 || StrWords >= 0x10000 || Length < 5 + OpLength + StrWords) { break; } UINT* pOpEnd = &pData[5 + OpLength]; pMessage->pOperands = (COperand*)malloc(NumOperands * sizeof(COperand)); if (!pMessage->pOperands) { throw E_OUTOFMEMORY; } CONST CShaderToken* pOperands = (CShaderToken*)&pData[5]; for (UINT i = 0; i < NumOperands; i++) { if (pOperands >= pOpEnd) { break; } pMessage->pOperands[i].Clear(); pOperands = ParseOperandAt(&pMessage->pOperands[i], pOperands, pOpEnd); } if (pOperands != pOpEnd) { free(pMessage->pOperands); pMessage->pOperands = NULL; break; } // Now that we're sure everything is valid we can // fill in the message info. pMessage->MessageID = (D3D11_SB_SHADER_MESSAGE_ID)pData[0]; pMessage->FormatStyle = (D3D11_SB_SHADER_MESSAGE_FORMAT)pData[1]; pMessage->pFormatString = (PCSTR)pOpEnd; pMessage->NumOperands = NumOperands; break; } } } break; case D3D11_SB_OPCODE_DCL_FUNCTION_BODY: pInstruction->m_FunctionBodyDecl.FunctionBodyNumber = (UINT)(*m_pCurrentToken); m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_FUNCTION_TABLE: pInstruction->m_FunctionTableDecl.FunctionTableNumber = (UINT)(*m_pCurrentToken); m_pCurrentToken++; pInstruction->m_FunctionTableDecl.TableLength = (UINT)(*m_pCurrentToken); // opcode // instruction length if extended instruction // table ID // table length // data assert(InstructionLength == (3 + (bExtended?1:0) + pInstruction->m_FunctionTableDecl.TableLength)); pInstruction->m_FunctionTableDecl.pFunctionIdentifiers = (UINT*) malloc(pInstruction->m_FunctionTableDecl.TableLength*sizeof(UINT)); if( NULL == pInstruction->m_FunctionTableDecl.pFunctionIdentifiers ) { throw E_OUTOFMEMORY; } m_pCurrentToken++; memcpy(pInstruction->m_FunctionTableDecl.pFunctionIdentifiers, m_pCurrentToken, pInstruction->m_FunctionTableDecl.TableLength*sizeof(UINT)); break; case D3D11_SB_OPCODE_DCL_INTERFACE: pInstruction->m_InterfaceDecl.bDynamicallyIndexed = DECODE_D3D11_SB_INTERFACE_INDEXED_BIT(Token); pInstruction->m_InterfaceDecl.InterfaceNumber = (WORD)(*m_pCurrentToken); m_pCurrentToken++; pInstruction->m_InterfaceDecl.ExpectedTableSize = (UINT)(*m_pCurrentToken); m_pCurrentToken++; // there's a limit of 64k types, so that gives a max length on this table. pInstruction->m_InterfaceDecl.TableLength = DECODE_D3D11_SB_INTERFACE_TABLE_LENGTH(*m_pCurrentToken); // this puts a limit on the size of interface arrays at 64k pInstruction->m_InterfaceDecl.ArrayLength = DECODE_D3D11_SB_INTERFACE_ARRAY_LENGTH(*m_pCurrentToken); // opcode // instruction length if extended instruction // interface ID // table size // num types/array length // data assert(InstructionLength == (4 + (bExtended?1:0) + pInstruction->m_InterfaceDecl.TableLength)); pInstruction->m_InterfaceDecl.pFunctionTableIdentifiers = (UINT*) malloc(pInstruction->m_InterfaceDecl.TableLength*sizeof(UINT)); if( NULL == pInstruction->m_InterfaceDecl.pFunctionTableIdentifiers ) { throw E_OUTOFMEMORY; } m_pCurrentToken++; memcpy(pInstruction->m_InterfaceDecl.pFunctionTableIdentifiers, m_pCurrentToken, pInstruction->m_InterfaceDecl.TableLength*sizeof(UINT)); break; case D3D11_SB_OPCODE_INTERFACE_CALL: pInstruction->m_InterfaceCall.FunctionIndex = *m_pCurrentToken++; pInstruction->m_InterfaceCall.pInterfaceOperand = pInstruction->m_Operands; ParseOperand(pInstruction->m_InterfaceCall.pInterfaceOperand); break; case D3D10_SB_OPCODE_DCL_RESOURCE: pInstruction->m_ResourceDecl.Dimension = DECODE_D3D10_SB_RESOURCE_DIMENSION(Token); ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_ResourceDecl.ReturnType[0] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 0); pInstruction->m_ResourceDecl.ReturnType[1] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 1); pInstruction->m_ResourceDecl.ReturnType[2] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 2); pInstruction->m_ResourceDecl.ReturnType[3] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 3); pInstruction->m_ResourceDecl.SampleCount = DECODE_D3D10_SB_RESOURCE_SAMPLE_COUNT(Token); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_SAMPLER: pInstruction->m_SamplerDecl.SamplerMode = DECODE_D3D10_SB_SAMPLER_MODE(Token); ParseOperand(&pInstruction->m_Operands[0]); break; case D3D11_SB_OPCODE_DCL_STREAM: ParseOperand(&pInstruction->m_Operands[0]); break; case D3D10_SB_OPCODE_DCL_TEMPS: pInstruction->m_TempsDecl.NumTemps = (UINT)(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP: pInstruction->m_IndexableTempDecl.IndexableTempNumber = (UINT)(*m_pCurrentToken); m_pCurrentToken++; pInstruction->m_IndexableTempDecl.NumRegisters = (UINT)(*m_pCurrentToken); m_pCurrentToken++; switch( min( 4u, max( 1u, (UINT)(*m_pCurrentToken) ) ) ) { case 1: pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_X; break; case 2: pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_X | D3D10_SB_OPERAND_4_COMPONENT_MASK_Y; break; case 3: pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_X | D3D10_SB_OPERAND_4_COMPONENT_MASK_Y | D3D10_SB_OPERAND_4_COMPONENT_MASK_Z; break; case 4: pInstruction->m_IndexableTempDecl.Mask = D3D10_SB_OPERAND_4_COMPONENT_MASK_ALL; break; } m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_INPUT: case D3D10_SB_OPCODE_DCL_OUTPUT: ParseOperand(&pInstruction->m_Operands[0]); break; case D3D10_SB_OPCODE_DCL_INPUT_SIV: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_InputDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_INPUT_SGV: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_InputDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_INPUT_PS: pInstruction->m_InputPSDecl.InterpolationMode = DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Token); ParseOperand(&pInstruction->m_Operands[0]); break; case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV: pInstruction->m_InputPSDeclSIV.InterpolationMode = DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Token); ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_InputPSDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV: pInstruction->m_InputPSDeclSGV.InterpolationMode = DECODE_D3D10_SB_INPUT_INTERPOLATION_MODE(Token); ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_InputPSDeclSGV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_OUTPUT_SIV: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_OutputDeclSIV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_OUTPUT_SGV: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_OutputDeclSGV.Name = DECODE_D3D10_SB_NAME(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_INDEX_RANGE: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_IndexRangeDecl.RegCount = (UINT)(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: pInstruction->m_ConstantBufferDecl.AccessPattern = DECODE_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(Token); ParseOperand(&pInstruction->m_Operands[0]); break; case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: pInstruction->m_OutputTopologyDecl.Topology = DECODE_D3D10_SB_GS_OUTPUT_PRIMITIVE_TOPOLOGY(Token); break; case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE: pInstruction->m_InputPrimitiveDecl.Primitive = DECODE_D3D10_SB_GS_INPUT_PRIMITIVE(Token); break; case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: pInstruction->m_GSMaxOutputVertexCountDecl.MaxOutputVertexCount = (UINT)(*m_pCurrentToken); m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT: pInstruction->m_GSInstanceCountDecl.InstanceCount = (UINT)(*m_pCurrentToken); m_pCurrentToken++; break; case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS: pInstruction->m_GlobalFlagsDecl.Flags = DECODE_D3D10_SB_GLOBAL_FLAGS(Token); break; case D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: pInstruction->m_InputControlPointCountDecl.InputControlPointCount = DECODE_D3D11_SB_INPUT_CONTROL_POINT_COUNT(Token); break; case D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: pInstruction->m_OutputControlPointCountDecl.OutputControlPointCount = DECODE_D3D11_SB_OUTPUT_CONTROL_POINT_COUNT(Token); break; case D3D11_SB_OPCODE_DCL_TESS_DOMAIN: pInstruction->m_TessellatorDomainDecl.TessellatorDomain = DECODE_D3D11_SB_TESS_DOMAIN(Token); break; case D3D11_SB_OPCODE_DCL_TESS_PARTITIONING: pInstruction->m_TessellatorPartitioningDecl.TessellatorPartitioning = DECODE_D3D11_SB_TESS_PARTITIONING(Token); break; case D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: pInstruction->m_TessellatorOutputPrimitiveDecl.TessellatorOutputPrimitive = DECODE_D3D11_SB_TESS_OUTPUT_PRIMITIVE(Token); break; case D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR: pInstruction->m_HSMaxTessFactorDecl.MaxTessFactor = *(float*)m_pCurrentToken; m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: pInstruction->m_HSForkPhaseInstanceCountDecl.InstanceCount = *(UINT*)m_pCurrentToken; m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: pInstruction->m_HSJoinPhaseInstanceCountDecl.InstanceCount = *(UINT*)m_pCurrentToken; m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_THREAD_GROUP: pInstruction->m_ThreadGroupDecl.x = *(UINT*)m_pCurrentToken++; pInstruction->m_ThreadGroupDecl.y = *(UINT*)m_pCurrentToken++; pInstruction->m_ThreadGroupDecl.z = *(UINT*)m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: pInstruction->m_TypedUAVDecl.Dimension = DECODE_D3D10_SB_RESOURCE_DIMENSION(Token); pInstruction->m_TypedUAVDecl.Flags = DECODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Token); ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_TypedUAVDecl.ReturnType[0] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 0); pInstruction->m_TypedUAVDecl.ReturnType[1] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 1); pInstruction->m_TypedUAVDecl.ReturnType[2] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 2); pInstruction->m_TypedUAVDecl.ReturnType[3] = DECODE_D3D10_SB_RESOURCE_RETURN_TYPE(*m_pCurrentToken, 3); break; case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: pInstruction->m_RawUAVDecl.Flags = DECODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Token); ParseOperand(&pInstruction->m_Operands[0]); break; case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: pInstruction->m_StructuredUAVDecl.Flags = DECODE_D3D11_SB_ACCESS_COHERENCY_FLAGS(Token) | DECODE_D3D11_SB_UAV_FLAGS(Token); ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_StructuredUAVDecl.ByteStride = *(UINT*)m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_RawTGSMDecl.ByteCount = *(UINT*)m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_StructuredTGSMDecl.StructByteStride = *(UINT*)m_pCurrentToken++; pInstruction->m_StructuredTGSMDecl.StructCount = *(UINT*)m_pCurrentToken++; break; case D3D11_SB_OPCODE_DCL_RESOURCE_RAW: ParseOperand(&pInstruction->m_Operands[0]); break; case D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED: ParseOperand(&pInstruction->m_Operands[0]); pInstruction->m_StructuredSRVDecl.ByteStride = *(UINT*)m_pCurrentToken++; break; case D3D11_SB_OPCODE_SYNC: { DWORD flags = DECODE_D3D11_SB_SYNC_FLAGS(Token); pInstruction->m_SyncFlags.bThreadsInGroup = (flags & D3D11_SB_SYNC_THREADS_IN_GROUP) ? true : false; pInstruction->m_SyncFlags.bThreadGroupSharedMemory = (flags & D3D11_SB_SYNC_THREAD_GROUP_SHARED_MEMORY) ? true : false; pInstruction->m_SyncFlags.bUnorderedAccessViewMemoryGroup = (flags & D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP ) ? true : false; pInstruction->m_SyncFlags.bUnorderedAccessViewMemoryGlobal = (flags & D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL ) ? true : false; } break; case D3D10_SB_OPCODE_RESINFO: pInstruction->m_ResInfoReturnType = DECODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(Token); ParseOperand(&pInstruction->m_Operands[0]); ParseOperand(&pInstruction->m_Operands[1]); ParseOperand(&pInstruction->m_Operands[2]); break; case D3D10_1_SB_OPCODE_SAMPLE_INFO: pInstruction->m_InstructionReturnType = DECODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(Token); ParseOperand(&pInstruction->m_Operands[0]); ParseOperand(&pInstruction->m_Operands[1]); break; case D3D10_SB_OPCODE_IF: case D3D10_SB_OPCODE_BREAKC: case D3D10_SB_OPCODE_CONTINUEC: case D3D10_SB_OPCODE_RETC: case D3D10_SB_OPCODE_DISCARD: pInstruction->SetTest(DECODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(Token)); ParseOperand(&pInstruction->m_Operands[0]); break; case D3D10_SB_OPCODE_CALLC: pInstruction->SetTest(DECODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(Token)); ParseOperand(&pInstruction->m_Operands[0]); ParseOperand(&pInstruction->m_Operands[1]); break; default: { for (UINT i=0; i < pInstruction->m_NumOperands; i++) { ParseOperand(&pInstruction->m_Operands[i]); } break; } } m_pCurrentToken = pStart + InstructionLength; } // **************************************************************************** // // class CShaderAsm // // **************************************************************************** void CShaderAsm::EmitOperand(const COperandBase& operand) { CShaderToken Token = ENCODE_D3D10_SB_OPERAND_TYPE(operand.m_Type) | ENCODE_D3D10_SB_OPERAND_NUM_COMPONENTS(operand.m_NumComponents) | ENCODE_D3D10_SB_OPERAND_EXTENDED(operand.m_bExtendedOperand); BOOL bProcessOperandIndices = FALSE; if (!(operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE32 || operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE64)) { Token |= ENCODE_D3D10_SB_OPERAND_INDEX_DIMENSION(operand.m_IndexDimension); if (operand.m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT) { // Component selection mode Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE(operand.m_ComponentSelection); switch(operand.m_ComponentSelection) { case D3D10_SB_OPERAND_4_COMPONENT_MASK_MODE: Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_MASK(operand.m_WriteMask ); break; case D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE_MODE: Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SWIZZLE(operand.m_Swizzle[0], operand.m_Swizzle[1], operand.m_Swizzle[2], operand.m_Swizzle[3]); break; case D3D10_SB_OPERAND_4_COMPONENT_SELECT_1_MODE: { Token |= ENCODE_D3D10_SB_OPERAND_4_COMPONENT_SELECT_1(operand.m_ComponentName); break; } default: throw E_FAIL; } } UINT NumDimensions = operand.m_IndexDimension; if (NumDimensions > 0) { bProcessOperandIndices = TRUE; // Encode index representation for (UINT i=0; i < NumDimensions; i++) { Token |= ENCODE_D3D10_SB_OPERAND_INDEX_REPRESENTATION(i, operand.m_IndexType[i]); } } FUNC(Token); } // Extended operand if (operand.m_bExtendedOperand) { Token = ENCODE_D3D10_SB_EXTENDED_OPERAND_TYPE(operand.m_ExtendedOperandType); if (operand.m_ExtendedOperandType == D3D10_SB_EXTENDED_OPERAND_MODIFIER) { Token |= ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(operand.m_Modifier); Token |= ENCODE_D3D11_SB_OPERAND_MIN_PRECISION(operand.m_MinPrecision); } FUNC(Token); } if( operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE32 || operand.m_Type == D3D10_SB_OPERAND_TYPE_IMMEDIATE64) { FUNC(Token); UINT n = 0; if (operand.m_NumComponents == D3D10_SB_OPERAND_4_COMPONENT) n = 4; else if (operand.m_NumComponents == D3D10_SB_OPERAND_1_COMPONENT) n = 1; else { throw E_FAIL; } for (UINT i=0 ; i < n; i++) { FUNC(operand.m_Value[i]); } } // Operand indices if (bProcessOperandIndices) { const UINT NumDimensions = operand.m_IndexDimension; // Encode index representation for (UINT i=0; i < NumDimensions; i++) { switch (operand.m_IndexType[i]) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: FUNC(operand.m_Index[i].m_RegIndex); break; case D3D10_SB_OPERAND_INDEX_IMMEDIATE64: FUNC(operand.m_Index[i].m_RegIndexA[0]); FUNC(operand.m_Index[i].m_RegIndexA[1]); break; case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: FUNC(operand.m_Index[i].m_RegIndex); // Fall through case D3D10_SB_OPERAND_INDEX_RELATIVE: { D3D10_SB_OPERAND_TYPE RelRegType = operand.m_Index[i].m_RelRegType; if( operand.m_Index[i].m_IndexDimension == D3D10_SB_OPERAND_INDEX_2D ) { EmitOperand(COperand2D(RelRegType, operand.m_Index[i].m_RelIndex, operand.m_Index[i].m_RelIndex1, operand.m_Index[i].m_ComponentName, operand.m_Index[i].m_MinPrecision)); } else { EmitOperand(COperand4(RelRegType, operand.m_Index[i].m_RelIndex, operand.m_Index[i].m_ComponentName, operand.m_Index[i].m_MinPrecision)); } } break; default: throw E_FAIL; } } } } //----------------------------------------------------------------------------- void CShaderAsm::EmitInstruction(const CInstruction& instruction) { UINT OpCode; if(instruction.m_OpCode == D3D10_SB_OPCODE_CUSTOMDATA) { OPCODE(D3D10_SB_OPCODE_CUSTOMDATA); FUNC(instruction.m_CustomData.DataSizeInBytes/4 + 2); for(UINT i = 0;i < instruction.m_CustomData.DataSizeInBytes/4; i++) FUNC(((UINT*)instruction.m_CustomData.pData)[i]); ENDINSTRUCTION(); return; } OpCode = ENCODE_D3D10_SB_OPCODE_TYPE(instruction.m_OpCode) | ENCODE_D3D10_SB_OPCODE_EXTENDED(instruction.m_ExtendedOpCodeCount > 0 ? true : false); switch (instruction.m_OpCode) { case D3D10_SB_OPCODE_IF: case D3D10_SB_OPCODE_BREAKC: case D3D10_SB_OPCODE_CALLC: case D3D10_SB_OPCODE_CONTINUEC: case D3D10_SB_OPCODE_RETC: case D3D10_SB_OPCODE_DISCARD: OpCode |= ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(instruction.Test()); break; case D3D10_SB_OPCODE_RESINFO: OpCode |= ENCODE_D3D10_SB_RESINFO_INSTRUCTION_RETURN_TYPE(instruction.m_ResInfoReturnType); break; case D3D10_1_SB_OPCODE_SAMPLE_INFO: OpCode |= ENCODE_D3D10_SB_INSTRUCTION_RETURN_TYPE(instruction.m_InstructionReturnType); break; case D3D11_SB_OPCODE_SYNC: OpCode |= ENCODE_D3D11_SB_SYNC_FLAGS( ( instruction.m_SyncFlags.bThreadsInGroup ? D3D11_SB_SYNC_THREADS_IN_GROUP : 0 ) | ( instruction.m_SyncFlags.bThreadGroupSharedMemory ? D3D11_SB_SYNC_THREAD_GROUP_SHARED_MEMORY : 0 ) | ( instruction.m_SyncFlags.bUnorderedAccessViewMemoryGlobal ? D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL : 0 ) | ( instruction.m_SyncFlags.bUnorderedAccessViewMemoryGroup ? D3D11_SB_SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP : 0 ) ); break; }; OpCode |= ENCODE_D3D10_SB_INSTRUCTION_SATURATE(instruction.m_bSaturate); OpCode |= ENCODE_D3D11_SB_INSTRUCTION_PRECISE_VALUES(instruction.m_PreciseMask); OPCODE(OpCode); for(UINT i = 0; i < min(instruction.m_ExtendedOpCodeCount,D3D11_SB_MAX_SIMULTANEOUS_EXTENDED_OPCODES); i++) { UINT Extended = ENCODE_D3D10_SB_EXTENDED_OPCODE_TYPE(instruction.m_OpCodeEx[i]); switch( instruction.m_OpCodeEx[i] ) { case D3D10_SB_EXTENDED_OPCODE_SAMPLE_CONTROLS: { Extended |= ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_U, instruction.m_TexelOffset[0]); Extended |= ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_V, instruction.m_TexelOffset[1]); Extended |= ENCODE_IMMEDIATE_D3D10_SB_ADDRESS_OFFSET(D3D10_SB_IMMEDIATE_ADDRESS_OFFSET_W, instruction.m_TexelOffset[2]); } break; case D3D11_SB_EXTENDED_OPCODE_RESOURCE_DIM: { Extended |= ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION(instruction.m_ResourceDimEx) | ENCODE_D3D11_SB_EXTENDED_RESOURCE_DIMENSION_STRUCTURE_STRIDE(instruction.m_ResourceDimStructureStrideEx); } break; case D3D11_SB_EXTENDED_OPCODE_RESOURCE_RETURN_TYPE: { for(UINT j = 0; j < 4; j++) { Extended |= ENCODE_D3D11_SB_EXTENDED_RESOURCE_RETURN_TYPE(instruction.m_ResourceReturnTypeEx[j],j); } } break; } Extended |= ENCODE_D3D10_SB_OPCODE_EXTENDED((i + 1 < instruction.m_ExtendedOpCodeCount) ? true : false); FUNC(Extended); } for (UINT i=0; i < instruction.m_NumOperands; i++) { EmitOperand(instruction.m_Operands[i]); } ENDINSTRUCTION(); } //***************************************************************************** // // CInstruction // //***************************************************************************** BOOL CInstruction::Disassemble( __out_ecount(StringSize) LPSTR pString, UINT StringSize) { StringCchCopy(pString, StringSize, g_InstructionInfo[m_OpCode].m_Name); return TRUE; } }; // name space D3D10ShaderBinary // End of file : ShaderBinary.cpp ================================================ FILE: src/ShaderParser.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #pragma once #include "pch.h" #include #include namespace D3D12TranslationLayer { Shader::Shader(ImmediateContext* pParent, std::unique_ptr byteCode, SIZE_T bytecodeSize) : DeviceChild(pParent) , m_ByteCode(std::move(byteCode)) , m_Desc({ m_ByteCode.get(), bytecodeSize }) { Init(); } Shader::Shader(ImmediateContext* pParent, const void* byteCode, SIZE_T bytecodeSize) : DeviceChild(pParent) , m_Desc({ byteCode, bytecodeSize }) { Init(); } void Shader::Init() { CDXBCParser DXBCParser; if (FAILED(DXBCParser.ReadDXBCAssumingValidSize(m_Desc.pShaderBytecode))) { return; } UINT BlobIndex = DXBCParser.FindNextMatchingBlob(DXBC_GenericShaderEx, 0); if (DXBC_BLOB_NOT_FOUND == BlobIndex) { BlobIndex = DXBCParser.FindNextMatchingBlob(DXBC_GenericShader, 0); } if (DXBC_BLOB_NOT_FOUND == BlobIndex) { return; } const UINT* pDriverBytecode = (const UINT*)DXBCParser.GetBlob(BlobIndex); Parse(pDriverBytecode); } void SShaderDecls::Parse(UINT const* pDriverBytecode) { D3D10ShaderBinary::CShaderCodeParser Parser(pDriverBytecode); UINT declSlot = 0; bool bDone = false; while (!Parser.EndOfShader() && !bDone) { D3D10ShaderBinary::CInstruction Instruction; Parser.ParseInstruction(&Instruction); switch (Instruction.m_OpCode) { case D3D10_SB_OPCODE_DCL_RESOURCE: case D3D11_SB_OPCODE_DCL_RESOURCE_RAW: case D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED: declSlot = Instruction.Operand(0).RegIndex(); if (declSlot >= m_ResourceDecls.size()) { m_ResourceDecls.resize(declSlot + 1, RESOURCE_DIMENSION::UNKNOWN); // throw( bad_alloc ) } break; case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: declSlot = Instruction.Operand(0).RegIndex(); if (declSlot >= m_UAVDecls.size()) { m_UAVDecls.resize(declSlot + 1, RESOURCE_DIMENSION::UNKNOWN); // throw( bad_alloc ) } break; case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: m_NumCBs = max(m_NumCBs, Instruction.Operand(0).RegIndex() + 1); break; case D3D10_SB_OPCODE_DCL_SAMPLER: m_NumSamplers = max(m_NumSamplers, Instruction.Operand(0).RegIndex() + 1); break; case D3D11_SB_OPCODE_DCL_STREAM: { UINT StreamIndex = Instruction.Operand(0).RegIndex(); m_OutputStreamMask |= (1 << StreamIndex); } break; } switch (Instruction.m_OpCode) { case D3D10_SB_OPCODE_DCL_RESOURCE: m_ResourceDecls[declSlot] = (RESOURCE_DIMENSION)Instruction.m_ResourceDecl.Dimension; break; case D3D11_SB_OPCODE_DCL_RESOURCE_RAW: m_ResourceDecls[declSlot] = RESOURCE_DIMENSION::BUFFER; break; case D3D11_SB_OPCODE_DCL_RESOURCE_STRUCTURED: m_ResourceDecls[declSlot] = RESOURCE_DIMENSION::BUFFER; break; case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: m_UAVDecls[declSlot] = (RESOURCE_DIMENSION)Instruction.m_TypedUAVDecl.Dimension; break; case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: m_UAVDecls[declSlot] = RESOURCE_DIMENSION::BUFFER; break; case D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: m_UAVDecls[declSlot] = RESOURCE_DIMENSION::BUFFER; break; case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: case D3D10_SB_OPCODE_DCL_SAMPLER: case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE: case D3D10_SB_OPCODE_DCL_TEMPS: case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP: case D3D10_SB_OPCODE_DCL_INPUT: case D3D10_SB_OPCODE_DCL_INPUT_SIV: case D3D10_SB_OPCODE_DCL_INPUT_SGV: case D3D10_SB_OPCODE_DCL_INPUT_PS: case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV: case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV: case D3D10_SB_OPCODE_DCL_OUTPUT: case D3D10_SB_OPCODE_DCL_OUTPUT_SGV: case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: case D3D11_SB_OPCODE_DCL_GS_INSTANCE_COUNT: case D3D10_SB_OPCODE_DCL_INDEX_RANGE: case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS: case D3D11_SB_OPCODE_HS_DECLS: case D3D11_SB_OPCODE_HS_CONTROL_POINT_PHASE: case D3D11_SB_OPCODE_HS_FORK_PHASE: case D3D11_SB_OPCODE_HS_JOIN_PHASE: case D3D11_SB_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: case D3D11_SB_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: case D3D11_SB_OPCODE_DCL_TESS_DOMAIN: case D3D11_SB_OPCODE_DCL_TESS_PARTITIONING: case D3D11_SB_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: case D3D11_SB_OPCODE_DCL_HS_MAX_TESSFACTOR: case D3D11_SB_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: case D3D11_SB_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: case D3D11_SB_OPCODE_DCL_FUNCTION_BODY: case D3D11_SB_OPCODE_DCL_FUNCTION_TABLE: case D3D11_SB_OPCODE_DCL_INTERFACE: case D3D10_SB_OPCODE_CUSTOMDATA: case D3D11_SB_OPCODE_DCL_THREAD_GROUP: case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: case D3D11_SB_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: case D3D10_SB_OPCODE_DCL_OUTPUT_SIV: case D3D11_SB_OPCODE_DCL_STREAM: break; default: // Stop compilation at the first non-declaration instruction bDone = true; break; } } // From the DX11.1 spec: // "If no streams are declared, output and // output topology declarations are assumed to be // for stream 0." if (0 == m_OutputStreamMask) { m_OutputStreamMask = 1; } } }; ================================================ FILE: src/SharedResourceHelpers.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include "SharedResourceHelpers.hpp" /* SharedResourceLocalHandle is an off-by-one index into the m_OpenResourceMap vector. The resource at index n has a handle value of n+1. */ namespace D3D12TranslationLayer { SharedResourceLocalHandle TRANSLATION_API SharedResourceHelpers::CreateKMTHandle(_In_ HANDLE resourceHandle) { CComPtr spResource; ThrowFailure(m_ImmCtx.m_pDevice12->OpenSharedHandle(resourceHandle, IID_PPV_ARGS(&spResource))); UINT handle = GetHandleForResource(spResource); // No more exceptions spResource.Detach(); return handle; } SharedResourceLocalHandle TRANSLATION_API SharedResourceHelpers::CreateKMTHandle(_In_ IUnknown* pResource) { return GetHandleForResource(pResource); } IUnknown* TRANSLATION_API SharedResourceHelpers::DetachKMTHandle(SharedResourceLocalHandle handle) { std::lock_guard lock(m_OpenResourceMapLock); // Called by C11On12 after a successful resource open handle--; assert(m_OpenResourceMap[handle] != nullptr); IUnknown* pUnknown = m_OpenResourceMap[handle]; m_OpenResourceMap[handle] = nullptr; return pUnknown; } void TRANSLATION_API SharedResourceHelpers::DestroyKMTHandle(SharedResourceLocalHandle handle) { std::lock_guard lock(m_OpenResourceMapLock); // This method is called by the core layer in cases of failure during OpenSharedResource // The KM handle for a shared resource is simply an index into this // vector, and "destroying" the KM handle should release the underlying resource. handle--; if (m_OpenResourceMap[handle]) { m_OpenResourceMap[handle]->Release(); m_OpenResourceMap[handle] = nullptr; } } SharedResourceLocalHandle SharedResourceHelpers::GetHandleForResource(_In_ IUnknown* pResource) { std::lock_guard lock(m_OpenResourceMapLock); // The KM handle for a shared resource is simply an index into this // vector, to allow the core layer to clean up an 11on12 resource the same way as an 11 resource UINT i = 0; assert(m_OpenResourceMap.size() < MAXUINT); for (; i < static_cast(m_OpenResourceMap.size()); ++i) { if (m_OpenResourceMap[i] == nullptr) { m_OpenResourceMap[i] = pResource; // It's now safe to detach the smart pointer return i+1; } } m_OpenResourceMap.push_back(pResource); // throw( bad_alloc ) // It's now safe to detach the smart pointer return i+1; } SharedResourceHelpers::SharedResourceHelpers(ImmediateContext& ImmCtx, CreationFlags const& Flags) noexcept : m_ImmCtx(ImmCtx) , m_CreationFlags(Flags) { } void TRANSLATION_API SharedResourceHelpers::InitializePrivateDriverData( DeferredDestructionType destructionType, _Out_writes_bytes_(dataSize) void* pResourcePrivateDriverData, _In_ UINT dataSize) { if (dataSize != cPrivateResourceDriverDataSize) { ThrowFailure(E_INVALIDARG); } __analysis_assume(dataSize == cPrivateResourceDriverDataSize); SOpenResourcePrivateData* pPrivateData = (SOpenResourcePrivateData*)pResourcePrivateDriverData; *pPrivateData = SOpenResourcePrivateData(destructionType); } IUnknown* TRANSLATION_API SharedResourceHelpers::QueryResourceFromKMTHandle(SharedResourceLocalHandle handle) { std::lock_guard lock(m_OpenResourceMapLock); handle--; assert(m_OpenResourceMap[handle] != nullptr); return m_OpenResourceMap[handle]; } //---------------------------------------------------------------------------------------------------------------------------------- UINT ConvertPossibleBindFlags(D3D12_RESOURCE_DESC& Desc, D3D12_HEAP_FLAGS HeapFlags, const D3D12_FEATURE_DATA_FORMAT_SUPPORT& formatSupport, bool SupportDisplayableTextures) { UINT BindFlags = 0; if (Desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { BindFlags |= D3D11_BIND_CONSTANT_BUFFER | D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER | D3D11_BIND_STREAM_OUTPUT; } else if (Desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D && SupportDisplayableTextures && (HeapFlags & D3D12_HEAP_FLAG_SHARED) && (HeapFlags & D3D12_HEAP_FLAG_ALLOW_DISPLAY) ) { BindFlags |= D3D11_BIND_DECODER | D3D11_BIND_VIDEO_ENCODER; } else if ((HeapFlags & D3D12_HEAP_FLAG_ALLOW_DISPLAY) == 0) { BindFlags |= (formatSupport.Support1 & D3D12_FORMAT_SUPPORT1_DECODER_OUTPUT) ? D3D11_BIND_DECODER : 0u; BindFlags |= (formatSupport.Support1 & D3D12_FORMAT_SUPPORT1_VIDEO_ENCODER) ? D3D11_BIND_VIDEO_ENCODER : 0u; } if (Desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) BindFlags |= D3D11_BIND_DEPTH_STENCIL; if (Desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) BindFlags |= D3D11_BIND_RENDER_TARGET; if (Desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) BindFlags |= D3D11_BIND_UNORDERED_ACCESS; if ((Desc.Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE) == 0) BindFlags |= D3D11_BIND_SHADER_RESOURCE; return BindFlags; } //---------------------------------------------------------------------------------------------------------------------------------- UINT ConvertHeapMiscFlags(D3D12_HEAP_FLAGS HeapFlags, bool bNtHandle, bool bKeyedMutex) { UINT MiscFlags = 0; if (HeapFlags & D3D12_HEAP_FLAG_SHARED) MiscFlags |= (bKeyedMutex ? D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX : D3D11_RESOURCE_MISC_SHARED) | (bNtHandle ? D3D11_RESOURCE_MISC_SHARED_NTHANDLE : 0); return MiscFlags; } //---------------------------------------------------------------------------------------------------------------------------------- UINT ConvertPossibleCPUAccessFlags(D3D12_HEAP_PROPERTIES HeapProps, ID3D12Device* pDevice) { UINT CPUAccessFlags = 0; if (HeapProps.Type != D3D12_HEAP_TYPE_CUSTOM) { HeapProps = pDevice->GetCustomHeapProperties(HeapProps.CreationNodeMask, HeapProps.Type); } switch (HeapProps.CPUPageProperty) { case D3D12_CPU_PAGE_PROPERTY_WRITE_BACK: CPUAccessFlags = D3D11_CPU_ACCESS_WRITE | D3D11_CPU_ACCESS_READ; break; case D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE: CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; break; } return CPUAccessFlags; } void TRANSLATION_API SharedResourceHelpers::QueryResourceInfoFromKMTHandle(UINT handle, _In_opt_ const D3D11_RESOURCE_FLAGS* pOverrideFlags, _Out_ ResourceInfo* pResourceInfo) { if (pResourceInfo == nullptr) { ThrowFailure(E_INVALIDARG); } ZeroMemory(pResourceInfo, sizeof(*pResourceInfo)); CComPtr spUnknown = QueryResourceFromKMTHandle(handle); bool bShared = false; ID3D12Object* pObject = nullptr; // First, check for tile pools CComPtr spHeap; if (SUCCEEDED(spUnknown->QueryInterface(&spHeap))) { // Shared tile pools not supported on tier 1 if (m_ImmCtx.GetCaps().TiledResourcesTier == D3D12_TILED_RESOURCES_TIER_1) { // TODO: Debug spew ThrowFailure(E_INVALIDARG); } pResourceInfo->TiledPool.m_HeapDesc = spHeap->GetDesc(); pResourceInfo->m_Type = TiledPoolType; bShared = (pResourceInfo->TiledPool.m_HeapDesc.Flags & D3D12_HEAP_FLAG_SHARED) != 0; pObject = spHeap; } // Not a tile pool, must be a resource CComPtr spResource; if (SUCCEEDED(spUnknown->QueryInterface(&spResource))) { D3D12_HEAP_FLAGS MiscFlags; pResourceInfo->m_Type = ResourceType; bShared = SUCCEEDED(spResource->GetHeapProperties(nullptr, &MiscFlags)) && (MiscFlags & D3D12_HEAP_FLAG_SHARED) != 0; pObject = spResource; } if (!spResource && !spHeap) { // TODO: Debug spew ThrowFailure(E_INVALIDARG); } if (!bShared) { pResourceInfo->m_bAllocatedBy9on12 = false; pResourceInfo->m_bNTHandle = false; pResourceInfo->m_bSynchronized = false; pResourceInfo->m_GDIHandle = 0; } else { D3D12_COMPATIBILITY_SHARED_FLAGS CompatFlags; ThrowFailure(m_ImmCtx.m_pCompatDevice->ReflectSharedProperties( pObject, D3D12_REFELCT_SHARED_PROPERTY_COMPATIBILITY_SHARED_FLAGS, &CompatFlags, sizeof(CompatFlags))); pResourceInfo->m_bNTHandle = (CompatFlags & D3D12_COMPATIBILITY_SHARED_FLAG_NON_NT_HANDLE) == 0; pResourceInfo->m_bSynchronized = (CompatFlags & D3D12_COMPATIBILITY_SHARED_FLAG_KEYED_MUTEX) != 0; pResourceInfo->m_bAllocatedBy9on12 = (CompatFlags & D3D12_COMPATIBILITY_SHARED_FLAG_9_ON_12) != 0; if (pResourceInfo->m_bNTHandle) { pResourceInfo->m_GDIHandle = 0; } else { ThrowFailure(m_ImmCtx.m_pCompatDevice->ReflectSharedProperties( pObject, D3D12_REFLECT_SHARED_PROPERTY_NON_NT_SHARED_HANDLE, &pResourceInfo->m_GDIHandle, sizeof(pResourceInfo->m_GDIHandle))); } } if (spResource) { D3D12_RESOURCE_DESC Desc = spResource->GetDesc(); D3D11_RESOURCE_FLAGS Flags = { }; D3D12_HEAP_FLAGS HeapFlags = D3D12_HEAP_FLAG_NONE; D3D12_HEAP_PROPERTIES HeapProps = { }; if (Desc.Width > UINT_MAX) { // TODO: Debug spew ThrowFailure(E_INVALIDARG); } if (m_CreationFlags.SupportDisplayableTextures && Desc.Format == DXGI_FORMAT_420_OPAQUE ) { // 420_OPAQUE doesn't exist in D3D12. Desc.Format = DXGI_FORMAT_NV12; } D3D12_FEATURE_DATA_FORMAT_SUPPORT formatSupport = { Desc.Format }; (void)m_ImmCtx.m_pDevice12->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &formatSupport, sizeof(formatSupport)); if (pOverrideFlags || FAILED(m_ImmCtx.m_pCompatDevice->ReflectSharedProperties( spResource, D3D12_REFLECT_SHARED_PROPERTY_D3D11_RESOURCE_FLAGS, &Flags, sizeof(Flags)))) { // First, determine the full set of capabilities from the resource Flags.MiscFlags = 0; Flags.CPUAccessFlags = 0; Flags.StructureByteStride = 0; if (SUCCEEDED(spResource->GetHeapProperties(&HeapProps, &HeapFlags))) { // Committed or placed Flags.MiscFlags |= ConvertHeapMiscFlags(HeapFlags, pResourceInfo->m_bNTHandle, pResourceInfo->m_bSynchronized); if (m_CreationFlags.SupportDisplayableTextures && (HeapFlags & D3D12_HEAP_FLAG_SHARED) && Desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER ) { if (HeapFlags & D3D12_HEAP_FLAG_ALLOW_DISPLAY) { Flags.MiscFlags |= 0x100000 /*D3D11_RESOURCE_MISC_SHARED_DISPLAYABLE*/; } if (!(Desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)) { Flags.MiscFlags |= 0x200000 /*D3D11_RESOURCE_MISC_SHARED_EXCLUSIVE_WRITER*/; } } Flags.CPUAccessFlags = ConvertPossibleCPUAccessFlags(HeapProps, m_ImmCtx.m_pDevice12.get()); } else { // Reserved (tiled) Flags.MiscFlags |= D3D11_RESOURCE_MISC_TILED; } Flags.BindFlags = ConvertPossibleBindFlags(Desc, HeapFlags, formatSupport, m_CreationFlags.SupportDisplayableTextures); { CComPtr spProtectedResourceSession; if (SUCCEEDED(spResource->GetProtectedResourceSession(IID_PPV_ARGS(&spProtectedResourceSession)))) { Flags.MiscFlags |= D3D11_RESOURCE_MISC_HW_PROTECTED; } } UINT AssumableBindFlags = (D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET | D3D11_BIND_UNORDERED_ACCESS); UINT AssumableMiscFlags = (D3D11_RESOURCE_MISC_RESTRICT_SHARED_RESOURCE | D3D11_RESOURCE_MISC_SHARED | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX | D3D11_RESOURCE_MISC_SHARED_NTHANDLE | D3D11_RESOURCE_MISC_TILED | D3D11_RESOURCE_MISC_TILE_POOL | D3D11_RESOURCE_MISC_HW_PROTECTED); if (m_CreationFlags.SupportDisplayableTextures) { AssumableBindFlags |= D3D11_BIND_VIDEO_ENCODER | D3D11_BIND_DECODER; // D3D11_RESOURCE_MISC_SHARED_DISPLAYABLE | D3D11_RESOURCE_MISC_SHARED_EXCLUSIVE_WRITER AssumableMiscFlags |= 0x300000; } static const UINT AssumableCPUFlags = 0; if (pOverrideFlags) { // Most misc flags will all be validated by the open shared resource process in the D3D11 core, // since validity of misc flags depend on the final bind/CPU flags, not the possible ones // The only ones we validate here are the ones that are based on heap properties (or lack thereof) if ((pOverrideFlags->MiscFlags & AssumableMiscFlags) & ~Flags.MiscFlags || pOverrideFlags->BindFlags & ~Flags.BindFlags || pOverrideFlags->CPUAccessFlags & ~Flags.CPUAccessFlags) { // TODO: Debug spew ThrowFailure(E_INVALIDARG); } if (pOverrideFlags->StructureByteStride && Desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER) { // TODO: Debug spew ThrowFailure(E_INVALIDARG); } Flags.BindFlags = pOverrideFlags->BindFlags; Flags.CPUAccessFlags = pOverrideFlags->CPUAccessFlags; Flags.StructureByteStride = pOverrideFlags->StructureByteStride; } else { Flags.BindFlags &= AssumableBindFlags; Flags.CPUAccessFlags &= AssumableCPUFlags; assert((Flags.MiscFlags & ~AssumableMiscFlags) == 0); } } #if DBG else { // Ensure the 11 flags are a subset of capabilities implied by the 12 resource desc assert(SUCCEEDED(spResource->GetHeapProperties(&HeapProps, &HeapFlags))); assert((Flags.CPUAccessFlags & ~ConvertPossibleCPUAccessFlags(HeapProps, m_ImmCtx.m_pDevice12.get())) == 0); assert((Flags.BindFlags & ~ConvertPossibleBindFlags(Desc, HeapFlags, formatSupport, m_CreationFlags.SupportDisplayableTextures)) == 0); assert((~Flags.MiscFlags & ConvertHeapMiscFlags(HeapFlags, pResourceInfo->m_bNTHandle, pResourceInfo->m_bSynchronized)) == 0); } #endif pResourceInfo->Resource.m_ResourceDesc = Desc; pResourceInfo->Resource.m_HeapFlags = HeapFlags; pResourceInfo->Resource.m_HeapProps = HeapProps; pResourceInfo->Resource.m_Flags11 = Flags; } } unique_comptr TRANSLATION_API SharedResourceHelpers::OpenResourceFromKmtHandle( ResourceCreationArgs& createArgs, _In_ SharedResourceLocalHandle kmtHandle, _In_reads_bytes_(resourcePrivateDriverDataSize) void* pResourcePrivateDriverData, _In_ UINT resourcePrivateDriverDataSize, _In_ D3D12_RESOURCE_STATES currentState) { UNREFERENCED_PARAMETER(resourcePrivateDriverDataSize); IUnknown* pRefToRelease = QueryResourceFromKMTHandle(kmtHandle); assert(resourcePrivateDriverDataSize == sizeof(SOpenResourcePrivateData)); SOpenResourcePrivateData* pPrivateData = reinterpret_cast(pResourcePrivateDriverData); auto spResource = Resource::OpenResource( &m_ImmCtx, createArgs, pRefToRelease, pPrivateData->GetDeferredDestructionType(), currentState); if (pRefToRelease) { pRefToRelease->Release(); } DetachKMTHandle(kmtHandle); return spResource; } } ================================================ FILE: src/SubresourceHelpers.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //================================================================================================================================== // // TRIGGER_CONDITION // // Used to essentially accumulate multiple conditions into one value. For example: during validation checking, one needs to check // to ensure multiple conditions are conceptually false, before it is known that all the parameters are valid. Starting with a // single value (uInvalid), initialized to 0, TRIGGER_CONDITION can be used to accumulate evaluations into such a value. At the // end of usage, (uInvalid) will be non-zero if any of those conditions were true. Ideal usage of TRIGGER_CONDITION uses only // a single condition check per statement. Multiple checks will conform to the C rules of evaluate the second statement only if // required, causing jumps, defeating the purpose. TRIGGER_CONDITION should be used more with (<, <=, >, >=, ==) than with // bit-checking and !=. For such cases, using math directly is better. // //================================================================================================================================== #ifndef TRIGGER_CONDITION #if defined( _X86_ ) // x86 preferred syntax that generates math: #define TRIGGER_CONDITION( uInvalid, Condition ) { uInvalid |= SIZE_T( Condition ); } #else // x64 preferred syntax that generates cmov: #define TRIGGER_CONDITION( uInvalid, Condition ) if (Condition) { uInvalid = 1; } #endif #endif //---------------------------------------------------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( UINT8 NumMips, UINT16 NumArraySlices, UINT8 NumPlanes, UINT8 FirstMip, UINT16 FirstArraySlice, UINT8 FirstPlane ) noexcept : m_BeginArray( FirstArraySlice ), m_EndArray( FirstArraySlice + NumArraySlices ), m_BeginMip( FirstMip ), m_EndMip( FirstMip + NumMips ), m_BeginPlane( FirstPlane ), m_EndPlane( FirstPlane + NumPlanes ) { assert(NumMips > 0 && NumArraySlices > 0 && NumPlanes > 0); assert(NumNonExtendedSubresources() > 0 && NumExtendedSubresources() > 0); } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const CBufferView& ) : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_EndMip( 1 ), m_BeginPlane(0), m_EndPlane(1) { } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D11_SHADER_RESOURCE_VIEW_DESC1& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_EndMip( 1 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Shader Resource View" ); break; case (D3D11_SRV_DIMENSION_BUFFER): case (D3D11_SRV_DIMENSION_BUFFEREX): break; case (D3D11_SRV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture1D.MipLevels ); break; case (D3D11_SRV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture1DArray.MipLevels ); break; case (D3D11_SRV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture2D.MipLevels ); m_BeginPlane = UINT8(Desc.Texture2D.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2D.PlaneSlice + 1); break; case (D3D11_SRV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture2DArray.MipLevels ); m_BeginPlane = UINT8(Desc.Texture2DArray.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2DArray.PlaneSlice + 1); break; case (D3D11_SRV_DIMENSION_TEXTURE2DMS): break; case (D3D11_SRV_DIMENSION_TEXTURE2DMSARRAY): m_BeginArray = UINT16( Desc.Texture2DMSArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DMSArray.ArraySize ); break; case (D3D11_SRV_DIMENSION_TEXTURE3D): m_EndArray = UINT16( -1 ); //all slices m_BeginMip = UINT8( Desc.Texture3D.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture3D.MipLevels ); break; case (D3D11_SRV_DIMENSION_TEXTURECUBE): m_BeginMip = UINT8( Desc.TextureCube.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.TextureCube.MipLevels ); m_BeginArray = 0; m_EndArray = 6; break; case (D3D11_SRV_DIMENSION_TEXTURECUBEARRAY): m_BeginArray = UINT16( Desc.TextureCubeArray.First2DArrayFace ); m_EndArray = UINT16( m_BeginArray + Desc.TextureCubeArray.NumCubes * 6 ); m_BeginMip = UINT8( Desc.TextureCubeArray.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.TextureCubeArray.MipLevels ); break; } } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D11_UNORDERED_ACCESS_VIEW_DESC1& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Unordered Access View" ); break; case (D3D11_UAV_DIMENSION_BUFFER): break; case (D3D11_UAV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MipSlice ); break; case (D3D11_UAV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MipSlice ); break; case (D3D11_UAV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2D.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2D.PlaneSlice + 1); break; case (D3D11_UAV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2DArray.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2DArray.PlaneSlice + 1); break; case (D3D11_UAV_DIMENSION_TEXTURE3D): m_BeginArray = UINT16( Desc.Texture3D.FirstWSlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture3D.WSize ); m_BeginMip = UINT8( Desc.Texture3D.MipSlice ); break; } m_EndMip = m_BeginMip + 1; } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D11_RENDER_TARGET_VIEW_DESC1& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Render Target View" ); break; case (D3D11_RTV_DIMENSION_BUFFER): break; case (D3D11_RTV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MipSlice ); break; case (D3D11_RTV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MipSlice ); break; case (D3D11_RTV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2D.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2D.PlaneSlice + 1); break; case (D3D11_RTV_DIMENSION_TEXTURE2DMS): break; case (D3D11_RTV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2DArray.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2DArray.PlaneSlice + 1); break; case (D3D11_RTV_DIMENSION_TEXTURE2DMSARRAY): m_BeginArray = UINT16( Desc.Texture2DMSArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DMSArray.ArraySize ); break; case (D3D11_RTV_DIMENSION_TEXTURE3D): m_BeginArray = UINT16( Desc.Texture3D.FirstWSlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture3D.WSize ); m_BeginMip = UINT8( Desc.Texture3D.MipSlice ); break; } m_EndMip = m_BeginMip + 1; } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D11_DEPTH_STENCIL_VIEW_DESC& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Depth Stencil View" ); break; case (D3D11_DSV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MipSlice ); break; case (D3D11_DSV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MipSlice ); break; case (D3D11_DSV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); break; case (D3D11_DSV_DIMENSION_TEXTURE2DMS): break; case (D3D11_DSV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MipSlice ); break; case (D3D11_DSV_DIMENSION_TEXTURE2DMSARRAY): m_BeginArray = UINT16( Desc.Texture2DMSArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DMSArray.ArraySize ); break; } m_EndMip = m_BeginMip + 1; } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D12_SHADER_RESOURCE_VIEW_DESC& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_EndMip( 1 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Shader Resource View" ); break; case (D3D12_SRV_DIMENSION_BUFFER): break; case (D3D12_SRV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture1D.MipLevels ); break; case (D3D12_SRV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture1DArray.MipLevels ); break; case (D3D12_SRV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture2D.MipLevels ); m_BeginPlane = UINT8(Desc.Texture2D.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2D.PlaneSlice + 1); break; case (D3D12_SRV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture2DArray.MipLevels ); m_BeginPlane = UINT8(Desc.Texture2DArray.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2DArray.PlaneSlice + 1); break; case (D3D12_SRV_DIMENSION_TEXTURE2DMS): break; case (D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY): m_BeginArray = UINT16( Desc.Texture2DMSArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DMSArray.ArraySize ); break; case (D3D12_SRV_DIMENSION_TEXTURE3D): m_EndArray = UINT16( -1 ); //all slices m_BeginMip = UINT8( Desc.Texture3D.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.Texture3D.MipLevels ); break; case (D3D12_SRV_DIMENSION_TEXTURECUBE): m_BeginMip = UINT8( Desc.TextureCube.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.TextureCube.MipLevels ); m_BeginArray = 0; m_EndArray = 6; break; case (D3D12_SRV_DIMENSION_TEXTURECUBEARRAY): m_BeginArray = UINT16( Desc.TextureCubeArray.First2DArrayFace ); m_EndArray = UINT16( m_BeginArray + Desc.TextureCubeArray.NumCubes * 6 ); m_BeginMip = UINT8( Desc.TextureCubeArray.MostDetailedMip ); m_EndMip = UINT8( m_BeginMip + Desc.TextureCubeArray.MipLevels ); break; } } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D12_UNORDERED_ACCESS_VIEW_DESC& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Unordered Access View" ); break; case (D3D12_UAV_DIMENSION_BUFFER): break; case (D3D12_UAV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MipSlice ); break; case (D3D12_UAV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MipSlice ); break; case (D3D12_UAV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2D.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2D.PlaneSlice + 1); break; case (D3D12_UAV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2DArray.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2DArray.PlaneSlice + 1); break; case (D3D12_UAV_DIMENSION_TEXTURE3D): m_BeginArray = UINT16( Desc.Texture3D.FirstWSlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture3D.WSize ); m_BeginMip = UINT8( Desc.Texture3D.MipSlice ); break; } m_EndMip = m_BeginMip + 1; } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D12_RENDER_TARGET_VIEW_DESC& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Render Target View" ); break; case (D3D12_RTV_DIMENSION_BUFFER): break; case (D3D12_RTV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MipSlice ); break; case (D3D12_RTV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MipSlice ); break; case (D3D12_RTV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2D.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2D.PlaneSlice + 1); break; case (D3D12_RTV_DIMENSION_TEXTURE2DMS): break; case (D3D12_RTV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MipSlice ); m_BeginPlane = UINT8(Desc.Texture2DArray.PlaneSlice); m_EndPlane = UINT8(Desc.Texture2DArray.PlaneSlice + 1); break; case (D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY): m_BeginArray = UINT16( Desc.Texture2DMSArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DMSArray.ArraySize ); break; case (D3D12_RTV_DIMENSION_TEXTURE3D): m_BeginArray = UINT16( Desc.Texture3D.FirstWSlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture3D.WSize ); m_BeginMip = UINT8( Desc.Texture3D.MipSlice ); break; } m_EndMip = m_BeginMip + 1; } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D12_DEPTH_STENCIL_VIEW_DESC& Desc ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_BeginPlane(0), m_EndPlane(1) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Depth Stencil View" ); break; case (D3D12_DSV_DIMENSION_TEXTURE1D): m_BeginMip = UINT8( Desc.Texture1D.MipSlice ); break; case (D3D12_DSV_DIMENSION_TEXTURE1DARRAY): m_BeginArray = UINT16( Desc.Texture1DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture1DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture1DArray.MipSlice ); break; case (D3D12_DSV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); break; case (D3D12_DSV_DIMENSION_TEXTURE2DMS): break; case (D3D12_DSV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MipSlice ); break; case (D3D12_DSV_DIMENSION_TEXTURE2DMSARRAY): m_BeginArray = UINT16( Desc.Texture2DMSArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DMSArray.ArraySize ); break; } m_EndMip = m_BeginMip + 1; } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC& Desc, DXGI_FORMAT ResourceFormat ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_EndMip( 1 ), m_BeginPlane(0), m_EndPlane((UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(ResourceFormat)) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Video Decoder Output View" ); break; case (D3D11_VDOV_DIMENSION_TEXTURE2D): m_BeginMip = 0; m_EndMip = 1; m_BeginArray = UINT16( Desc.Texture2D.ArraySlice ); m_EndArray = m_BeginArray + 1; break; } } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC& Desc, DXGI_FORMAT ResourceFormat ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_EndMip( 1 ), m_BeginPlane(0), m_EndPlane((UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(ResourceFormat)) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Video Processor Input View" ); break; case (D3D11_VPIV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); m_EndMip = UINT8( m_BeginMip + 1 ); m_BeginArray = UINT16( Desc.Texture2D.ArraySlice ); m_EndArray = m_BeginArray + 1; break; } } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset( const D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC& Desc, DXGI_FORMAT ResourceFormat ) noexcept : m_BeginArray( 0 ), m_EndArray( 1 ), m_BeginMip( 0 ), m_EndMip( 1 ), m_BeginPlane(0), m_EndPlane((UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(ResourceFormat)) { switch (Desc.ViewDimension) { default: ASSUME( 0 && "Corrupt Resource Type on Video Processor Output View" ); break; case (D3D11_VPOV_DIMENSION_TEXTURE2D): m_BeginMip = UINT8( Desc.Texture2D.MipSlice ); m_EndMip = UINT8( m_BeginMip + 1 ); break; case (D3D11_VPOV_DIMENSION_TEXTURE2DARRAY): m_BeginArray = UINT16( Desc.Texture2DArray.FirstArraySlice ); m_EndArray = UINT16( m_BeginArray + Desc.Texture2DArray.ArraySize ); m_BeginMip = UINT8( Desc.Texture2DArray.MipSlice ); m_EndMip = UINT8( m_BeginMip + 1 ); break; } } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset(const VIDEO_DECODER_OUTPUT_VIEW_DESC_INTERNAL& Desc) noexcept : m_BeginArray(UINT16(Desc.ArraySlice)), m_EndArray(UINT16(Desc.ArraySlice + 1)), m_BeginMip(0), m_EndMip(1), m_BeginPlane(0), m_EndPlane((UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(Desc.Format)) { } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset(const VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL& Desc) noexcept : m_BeginArray(UINT16(Desc.ArraySlice)), m_EndArray(UINT16(Desc.ArraySlice + 1)), m_BeginMip(UINT8(Desc.MipSlice)), m_EndMip(UINT8(Desc.MipSlice + 1)), m_BeginPlane(0), m_EndPlane((UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(Desc.Format)) { } //---------------------------------------------------------------------------------------------------------------------------------- CSubresourceSubset::CSubresourceSubset(const VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_INTERNAL& Desc) noexcept : m_BeginArray(UINT16(Desc.FirstArraySlice)), m_EndArray(UINT16(Desc.FirstArraySlice + Desc.ArraySize)), m_BeginMip(UINT8(Desc.MipSlice)), m_EndMip(UINT8(Desc.MipSlice + 1)), m_BeginPlane(0), m_EndPlane((UINT8)CD3D11FormatHelper::NonOpaquePlaneCount(Desc.Format)) { } //---------------------------------------------------------------------------------------------------------------------------------- UINT CSubresourceSubset::Mask() const noexcept { // This only works for views that only reference subresources < 32. // This is sufficient for the YUV/Decode SwapChain case where it is used. assert(m_BeginMip == 0); assert(m_EndMip == 1); assert(m_EndArray <= 32); assert(m_EndArray >= 1); UINT result = (2 << (m_EndArray-1)) - (1 << m_BeginArray); #if DBG for (unsigned i = 0; i < 32; ++i) { assert( (!!(result & (1< i) ); } assert( ((2 << 31) - 1) == -1 ); #endif return result; } //---------------------------------------------------------------------------------------------------------------------------------- SIZE_T CSubresourceSubset::DoesNotOverlap( const CSubresourceSubset& other ) const noexcept { SIZE_T uDoNotOverlap = 0; TRIGGER_CONDITION( uDoNotOverlap, m_EndArray <= other.m_BeginArray ); TRIGGER_CONDITION( uDoNotOverlap, other.m_EndArray <= m_BeginArray ); TRIGGER_CONDITION( uDoNotOverlap, m_EndMip <= other.m_BeginMip ); TRIGGER_CONDITION( uDoNotOverlap, other.m_EndMip <= m_BeginMip ); TRIGGER_CONDITION( uDoNotOverlap, m_EndPlane <= other.m_BeginPlane ); TRIGGER_CONDITION( uDoNotOverlap, other.m_EndPlane <= m_BeginPlane ); return uDoNotOverlap; } //---------------------------------------------------------------------------------------------------------------------------------- UINT CSubresourceSubset::NumNonExtendedSubresources() const noexcept { return (m_EndArray - m_BeginArray) * (m_EndMip - m_BeginMip); } //---------------------------------------------------------------------------------------------------------------------------------- UINT CSubresourceSubset::NumExtendedSubresources() const noexcept { return (m_EndArray - m_BeginArray) * (m_EndMip - m_BeginMip) * (m_EndPlane - m_BeginPlane); } //================================================================================================================================== // CViewSubresourceSubset // Extends CSubresourceSubset to support iterating over subresource ranges //================================================================================================================================== //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( CSubresourceSubset const& Subresources, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount ) : CSubresourceSubset( Subresources ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const CBufferView& ) : CSubresourceSubset( CBufferView() ) , m_MipLevels( 1 ) , m_ArraySlices( 1 ) , m_PlaneCount( 1 ) { } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const D3D11_SHADER_RESOURCE_VIEW_DESC1& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount ) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { if (Desc.ViewDimension == D3D11_SRV_DIMENSION_TEXTURE3D) { assert(m_BeginArray == 0); m_EndArray = 1; } // When this class is used by 11on12 for depthstencil formats, it treats them as planar if (m_PlaneCount == 2) { switch (Desc.Format) { case DXGI_FORMAT_R24_UNORM_X8_TYPELESS: case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS: m_BeginPlane = 0; m_EndPlane = 1; break; case DXGI_FORMAT_X24_TYPELESS_G8_UINT: case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT: m_BeginPlane = 1; m_EndPlane = 2; break; default: assert(!CD3D11FormatHelper::FamilySupportsStencil(Desc.Format)); } } Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const D3D11_UNORDERED_ACCESS_VIEW_DESC1& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount ) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { if (Desc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE3D) { m_BeginArray = 0; m_EndArray = 1; } Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const D3D11_DEPTH_STENCIL_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const D3D11_RENDER_TARGET_VIEW_DESC1& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount ) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { if (Desc.ViewDimension == D3D11_RTV_DIMENSION_TEXTURE3D) { m_BeginArray = 0; m_EndArray = 1; } Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const D3D12_SHADER_RESOURCE_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount ) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { if (Desc.ViewDimension == D3D12_SRV_DIMENSION_TEXTURE3D) { assert(m_BeginArray == 0); m_EndArray = 1; } Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const D3D12_UNORDERED_ACCESS_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount ) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { if (Desc.ViewDimension == D3D12_UAV_DIMENSION_TEXTURE3D) { m_BeginArray = 0; m_EndArray = 1; } Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset(const D3D12_DEPTH_STENCIL_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount, DepthStencilMode DSMode) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { // When this class is used by 11on12 for depthstencil formats, it treats them as planar // When binding DSVs of planar resources, additional view subresource subsets will be constructed if (m_PlaneCount == 2) { if (DSMode != ReadOrWrite) { bool bWritable = DSMode == WriteOnly; bool bDepth = !(Desc.Flags & static_cast(D3D11_DSV_READ_ONLY_DEPTH)) == bWritable; bool bStencil = !(Desc.Flags & static_cast(D3D11_DSV_READ_ONLY_STENCIL)) == bWritable; m_BeginPlane = (bDepth ? 0 : 1); m_EndPlane = (bStencil ? 2 : 1); } else { m_BeginPlane = 0; m_EndPlane = 2; } } Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset( const D3D12_RENDER_TARGET_VIEW_DESC& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount ) : CSubresourceSubset( Desc ) , m_MipLevels( MipLevels ) , m_ArraySlices( ArraySize ) , m_PlaneCount( PlaneCount ) { if (Desc.ViewDimension == D3D12_RTV_DIMENSION_TEXTURE3D) { m_BeginArray = 0; m_EndArray = 1; } Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset(const VIDEO_DECODER_OUTPUT_VIEW_DESC_INTERNAL& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount) : CSubresourceSubset(Desc) , m_MipLevels(MipLevels) , m_ArraySlices(ArraySize) , m_PlaneCount(PlaneCount) { Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset(const VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount) : CSubresourceSubset(Desc) , m_MipLevels(MipLevels) , m_ArraySlices(ArraySize) , m_PlaneCount(PlaneCount) { Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceSubset(const VIDEO_PROCESSOR_OUTPUT_VIEW_DESC_INTERNAL& Desc, UINT8 MipLevels, UINT16 ArraySize, UINT8 PlaneCount) : CSubresourceSubset(Desc) , m_MipLevels(MipLevels) , m_ArraySlices(ArraySize) , m_PlaneCount(PlaneCount) { // Do not allow reduction for video processor output views due to manual manipulation for stereo views // Reduce(); } //---------------------------------------------------------------------------------------------------------------------------------- // Allows constructing a CViewSubresourceSubset from a core layer C*View object template /*static*/ CViewSubresourceSubset CViewSubresourceSubset::FromView( const T* pView ) { return CViewSubresourceSubset( pView->Desc(), static_cast(pView->Resource()->MipLevels()), static_cast(pView->Resource()->ArraySize()), static_cast(pView->Resource()->PlaneCount()) ); } //---------------------------------------------------------------------------------------------------------------------------------- // Strictly for performance, allows coalescing contiguous subresource ranges into a single range void CViewSubresourceSubset::Reduce() { if ( m_BeginMip == 0 && m_EndMip == m_MipLevels && m_BeginArray == 0 && m_EndArray == m_ArraySlices) { UINT startSubresource = ComposeSubresourceIdxExtended(0, 0, m_BeginPlane, m_MipLevels, m_ArraySlices); UINT endSubresource = ComposeSubresourceIdxExtended(0, 0, m_EndPlane, m_MipLevels, m_ArraySlices); // Only coalesce if the full-resolution UINTs fit in the UINT8s used for storage here if (endSubresource < static_cast(-1)) { m_BeginArray = 0; m_EndArray = 1; m_BeginPlane = 0; m_EndPlane = 1; m_BeginMip = static_cast(startSubresource); m_EndMip = static_cast(endSubresource); } } } //---------------------------------------------------------------------------------------------------------------------------------- bool CViewSubresourceSubset::IsWholeResource() const { return m_BeginMip == 0 && m_BeginArray == 0 && m_BeginPlane == 0 && (m_EndMip * m_EndArray * m_EndPlane == m_MipLevels * m_ArraySlices * m_PlaneCount); } //---------------------------------------------------------------------------------------------------------------------------------- bool CViewSubresourceSubset::IsEmpty() const { return m_BeginMip == m_EndMip || m_BeginArray == m_EndArray || m_BeginPlane == m_EndPlane; } //---------------------------------------------------------------------------------------------------------------------------------- UINT CViewSubresourceSubset::MinSubresource() const { return (*begin()).first; } //---------------------------------------------------------------------------------------------------------------------------------- UINT CViewSubresourceSubset::MaxSubresource() const { return (*(--end())).second; } //---------------------------------------------------------------------------------------------------------------------------------- UINT CViewSubresourceSubset::ArraySize() const { return m_ArraySlices; } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceIterator CViewSubresourceSubset::begin() const { return CViewSubresourceIterator(*this, m_BeginArray, m_BeginPlane); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceIterator CViewSubresourceSubset::end() const { return CViewSubresourceIterator(*this, m_BeginArray, m_EndPlane); } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceIterator::CViewSubresourceIterator(CViewSubresourceSubset const& SubresourceSet, UINT16 ArraySlice, UINT8 PlaneSlice) : m_Subresources(SubresourceSet) , m_CurrentArraySlice(ArraySlice) , m_CurrentPlaneSlice(PlaneSlice) { } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceIterator& CViewSubresourceSubset::CViewSubresourceIterator::operator++() { assert( m_CurrentArraySlice < m_Subresources.m_EndArray ); if (++m_CurrentArraySlice >= m_Subresources.m_EndArray ) { assert( m_CurrentPlaneSlice < m_Subresources.m_EndPlane ); m_CurrentArraySlice= m_Subresources.m_BeginArray; ++m_CurrentPlaneSlice; } return *this; } //---------------------------------------------------------------------------------------------------------------------------------- CViewSubresourceSubset::CViewSubresourceIterator& CViewSubresourceSubset::CViewSubresourceIterator::operator--() { if (m_CurrentArraySlice <= m_Subresources.m_BeginArray) { m_CurrentArraySlice = m_Subresources.m_EndArray; assert( m_CurrentPlaneSlice > m_Subresources.m_BeginPlane ); --m_CurrentPlaneSlice; } --m_CurrentArraySlice; return *this; } //---------------------------------------------------------------------------------------------------------------------------------- bool CViewSubresourceSubset::CViewSubresourceIterator::operator==(CViewSubresourceIterator const& other) const { return &other.m_Subresources == &m_Subresources && other.m_CurrentArraySlice == m_CurrentArraySlice && other.m_CurrentPlaneSlice == m_CurrentPlaneSlice; } //---------------------------------------------------------------------------------------------------------------------------------- bool CViewSubresourceSubset::CViewSubresourceIterator::operator!=(CViewSubresourceIterator const& other) const { return !(other == *this); } //---------------------------------------------------------------------------------------------------------------------------------- UINT CViewSubresourceSubset::CViewSubresourceIterator::StartSubresource() const { return ComposeSubresourceIdxExtended(m_Subresources.m_BeginMip, m_CurrentArraySlice, m_CurrentPlaneSlice, m_Subresources.m_MipLevels, m_Subresources.m_ArraySlices); } //---------------------------------------------------------------------------------------------------------------------------------- UINT CViewSubresourceSubset::CViewSubresourceIterator::EndSubresource() const { return ComposeSubresourceIdxExtended(m_Subresources.m_EndMip, m_CurrentArraySlice, m_CurrentPlaneSlice, m_Subresources.m_MipLevels, m_Subresources.m_ArraySlices); } //---------------------------------------------------------------------------------------------------------------------------------- std::pair CViewSubresourceSubset::CViewSubresourceIterator::operator*() const { return std::make_pair(StartSubresource(), EndSubresource()); } //---------------------------------------------------------------------------------------------------------------------------------- // Calculate either a new coordinate in the same subresource, or targeting a new subresource with the number of tiles remaining void CalcNewTileCoords(D3D11_TILED_RESOURCE_COORDINATE &Coord, UINT &NumTiles, D3D11_SUBRESOURCE_TILING const& SubresourceTiling) { assert(SubresourceTiling.StartTileIndexInOverallResource != D3D11_PACKED_TILE); UINT NumTilesInSubresource = SubresourceTiling.WidthInTiles * SubresourceTiling.HeightInTiles * SubresourceTiling.DepthInTiles; UINT NumTilesInSlice = SubresourceTiling.WidthInTiles * SubresourceTiling.HeightInTiles; UINT CoordTileOffset = Coord.X + Coord.Y * SubresourceTiling.WidthInTiles + Coord.Z * NumTilesInSlice; if (CoordTileOffset + NumTiles >= NumTilesInSubresource) { NumTiles -= (NumTilesInSubresource - CoordTileOffset); Coord = D3D11_TILED_RESOURCE_COORDINATE{0, 0, 0, Coord.Subresource + 1}; return; } CoordTileOffset += NumTiles; Coord.Z = CoordTileOffset / NumTilesInSlice; CoordTileOffset %= NumTilesInSlice; Coord.Y = CoordTileOffset / SubresourceTiling.WidthInTiles; Coord.X = CoordTileOffset % SubresourceTiling.WidthInTiles; NumTiles = 0; } //---------------------------------------------------------------------------------------------------------------------------------- CTileSubresourceSubset::CTileSubresourceSubset( const D3D11_TILED_RESOURCE_COORDINATE& StartCoord, const D3D11_TILE_REGION_SIZE& Region, D3D11_RESOURCE_DIMENSION ResDim, _In_reads_(NumStandardMips) const D3D11_SUBRESOURCE_TILING* pSubresourceTilings, UINT MipLevels, UINT NumStandardMips) : m_FirstSubresource(StartCoord.Subresource) , m_MipsPerSlice(MipLevels) { assert(NumStandardMips <= MipLevels); if (Region.bUseBox) { // When a box is used, the region either only targets a single subresource, // or can target the same mip from multiple array slices using the depth field m_bTargetingArraySlices = true; if (ResDim != D3D11_RESOURCE_DIMENSION_TEXTURE3D) { m_NumSubresourcesOrArraySlices = Region.Depth; } else { m_NumSubresourcesOrArraySlices = 1; } } else { m_bTargetingArraySlices = false; UINT NumTiles = Region.NumTiles; D3D11_TILED_RESOURCE_COORDINATE CoordCopy = StartCoord; while (NumTiles) { if (CoordCopy.Subresource >= NumStandardMips && NumStandardMips != MipLevels) { // Once we target the first packed mip, all packed are affected // Note: if there are packed mips, the resource cannot be arrayed m_NumSubresourcesOrArraySlices = MipLevels - m_FirstSubresource; return; } D3D12TranslationLayer::CalcNewTileCoords(CoordCopy, NumTiles, pSubresourceTilings[CoordCopy.Subresource % MipLevels]); } // If the tiles just covered the previous subresource, then the subresource here is // one past the last subresource affected by this operation, no need to convert from index to count m_NumSubresourcesOrArraySlices = CoordCopy.Subresource - m_FirstSubresource; if (CoordCopy.X != 0 || CoordCopy.Y != 0 || CoordCopy.Z != 0) { // The subresource here WAS affected, + 1 to convert from index to count ++m_NumSubresourcesOrArraySlices; } assert(m_NumSubresourcesOrArraySlices > 0); } } //---------------------------------------------------------------------------------------------------------------------------------- CTileSubresourceSubset::CIterator CTileSubresourceSubset::begin() const { return CIterator(*this, 0); } //---------------------------------------------------------------------------------------------------------------------------------- CTileSubresourceSubset::CIterator CTileSubresourceSubset::end() const { return CIterator(*this, m_NumSubresourcesOrArraySlices); } //---------------------------------------------------------------------------------------------------------------------------------- UINT CTileSubresourceSubset::CalcSubresource(UINT SubresourceIdx) const { assert(SubresourceIdx < m_NumSubresourcesOrArraySlices); if (m_bTargetingArraySlices) { return m_FirstSubresource + SubresourceIdx * m_MipsPerSlice; } else { return m_FirstSubresource + SubresourceIdx; } } //---------------------------------------------------------------------------------------------------------------------------------- CTileSubresourceSubset::CIterator::CIterator(CTileSubresourceSubset const& TileSubset, UINT SubresourceIdx) : m_TileSubset(TileSubset) , m_SubresourceIdx(SubresourceIdx) { } //---------------------------------------------------------------------------------------------------------------------------------- CTileSubresourceSubset::CIterator &CTileSubresourceSubset::CIterator::operator++() { assert(m_SubresourceIdx < m_TileSubset.m_NumSubresourcesOrArraySlices); ++m_SubresourceIdx; return *this; } //---------------------------------------------------------------------------------------------------------------------------------- CTileSubresourceSubset::CIterator &CTileSubresourceSubset::CIterator::operator--() { assert(m_SubresourceIdx > 0); --m_SubresourceIdx; return *this; } //---------------------------------------------------------------------------------------------------------------------------------- bool CTileSubresourceSubset::CIterator::operator==(CIterator const& other) const { return (&m_TileSubset == &other.m_TileSubset && m_SubresourceIdx == other.m_SubresourceIdx); } bool CTileSubresourceSubset::CIterator::operator!=(CIterator const& other) const { return !(*this == other); } //---------------------------------------------------------------------------------------------------------------------------------- UINT CTileSubresourceSubset::CIterator::operator*() const { return m_TileSubset.CalcSubresource(m_SubresourceIdx); } }; ================================================ FILE: src/SwapChainHelper.cpp ================================================ #include "pch.h" #include "SwapChainHelper.hpp" namespace D3D12TranslationLayer { SwapChainHelper::SwapChainHelper( IDXGISwapChain3* swapChain ) : m_swapChain( swapChain ) { } HRESULT SwapChainHelper::StandardPresent( ImmediateContext& context, D3DKMT_PRESENT *pKMTPresent, Resource& presentingResource ) { unique_comptr backBuffer; m_swapChain->GetBuffer( m_swapChain->GetCurrentBackBufferIndex(), IID_PPV_ARGS( &backBuffer ) ); D3D12TranslationLayer::ResourceCreationArgs destArgs = *presentingResource.Parent(); destArgs.m_appDesc.m_Samples = 1; destArgs.m_appDesc.m_bindFlags = D3D12TranslationLayer::RESOURCE_BIND_RENDER_TARGET; destArgs.m_desc12.SampleDesc.Count = 1; destArgs.m_desc12.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; auto destResource = D3D12TranslationLayer::Resource::OpenResource( &context, destArgs, backBuffer.get(), D3D12TranslationLayer::DeferredDestructionType::Submission, D3D12_RESOURCE_STATE_COMMON ); D3D12_RESOURCE_STATES OperationState; if (presentingResource.AppDesc()->Samples() > 1) { context.ResourceResolveSubresource( destResource.get(), 0, &presentingResource, 0, destArgs.m_appDesc.Format() ); OperationState = D3D12_RESOURCE_STATE_RESOLVE_DEST; } else { context.ResourceCopy( destResource.get(), &presentingResource ); OperationState = D3D12_RESOURCE_STATE_COPY_DEST; } D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( backBuffer.get(), OperationState, D3D12_RESOURCE_STATE_COMMON ); context.GetGraphicsCommandList()->ResourceBarrier( 1, &Barrier ); context.Flush( D3D12TranslationLayer::COMMAND_LIST_TYPE_ALL_MASK ); HRESULT hr = m_swapChain->Present( pKMTPresent->FlipInterval, pKMTPresent->FlipInterval == 0 ? DXGI_PRESENT_ALLOW_TEARING : 0 ); context.GetCommandListManager( D3D12TranslationLayer::COMMAND_LIST_TYPE::GRAPHICS )->SetNeedSubmitFence(); return hr; } } ================================================ FILE: src/SwapChainManager.cpp ================================================ #include "pch.h" #include "SwapChainManager.hpp" namespace D3D12TranslationLayer { SwapChainManager::SwapChainManager( D3D12TranslationLayer::ImmediateContext& ImmCtx ) : m_ImmCtx( ImmCtx ) {} SwapChainManager::~SwapChainManager() { m_ImmCtx.WaitForCompletion( D3D12TranslationLayer::COMMAND_LIST_TYPE_ALL_MASK ); } IDXGISwapChain3* SwapChainManager::GetSwapChainForWindow(HWND hwnd, Resource& presentingResource) { auto& spSwapChain = m_SwapChains[hwnd]; auto pResourceDesc = presentingResource.AppDesc(); DXGI_SWAP_CHAIN_DESC Desc = {}; //SwapChain creation fails if using BGRX, so pretend that it's BGRA. Present still works as expected. DXGI_FORMAT format = pResourceDesc->Format() == DXGI_FORMAT_B8G8R8X8_UNORM ? DXGI_FORMAT_B8G8R8A8_UNORM : pResourceDesc->Format(); if (spSwapChain) { spSwapChain->GetDesc( &Desc ); if (Desc.BufferDesc.Format != format || Desc.BufferDesc.Width != pResourceDesc->Width() || Desc.BufferDesc.Height != pResourceDesc->Height()) { m_ImmCtx.WaitForCompletion( D3D12TranslationLayer::COMMAND_LIST_TYPE_ALL_MASK ); ThrowFailure( spSwapChain->ResizeBuffers( BufferCount, pResourceDesc->Width(), pResourceDesc->Height(), format, DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING ) ); } } else { Desc.BufferCount = BufferCount; Desc.BufferDesc.Format = format; Desc.BufferDesc.Width = pResourceDesc->Width(); Desc.BufferDesc.Height = pResourceDesc->Height(); Desc.OutputWindow = hwnd; Desc.Windowed = 1; Desc.SampleDesc.Count = 1; Desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; Desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; Desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; unique_comptr spFactory; ThrowFailure( CreateDXGIFactory2( 0, IID_PPV_ARGS( &spFactory ) ) ); unique_comptr spBaseSwapChain; ThrowFailure( spFactory->CreateSwapChain( m_ImmCtx.GetCommandQueue( D3D12TranslationLayer::COMMAND_LIST_TYPE::GRAPHICS ), &Desc, &spBaseSwapChain ) ); ThrowFailure( spBaseSwapChain->QueryInterface( &spSwapChain ) ); } return spSwapChain.get(); } } ================================================ FILE: src/Util.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { UINT GetByteAlignment(DXGI_FORMAT format) { return CD3D11FormatHelper::GetByteAlignment(format); } } #ifndef NO_IMPLEMENT_RECT_FNS #pragma warning(disable: 4273) // Inconsistent DLL linkage - this is by design // Avoid linking in the user32 dependency BOOL APIENTRY IntersectRect( __out LPRECT prcDst, __in CONST RECT *prcSrc1, __in CONST RECT *prcSrc2) { prcDst->left = max(prcSrc1->left, prcSrc2->left); prcDst->right = min(prcSrc1->right, prcSrc2->right); /* * check for empty rect */ if (prcDst->left < prcDst->right) { prcDst->top = max(prcSrc1->top, prcSrc2->top); prcDst->bottom = min(prcSrc1->bottom, prcSrc2->bottom); /* * check for empty rect */ if (prcDst->top < prcDst->bottom) { return TRUE; // not empty } } /* * empty rect */ *prcDst = {}; return FALSE; } BOOL APIENTRY UnionRect( __out LPRECT prcDst, __in CONST RECT *prcSrc1, __in CONST RECT *prcSrc2) { BOOL frc1Empty, frc2Empty; frc1Empty = ((prcSrc1->left >= prcSrc1->right) || (prcSrc1->top >= prcSrc1->bottom)); frc2Empty = ((prcSrc2->left >= prcSrc2->right) || (prcSrc2->top >= prcSrc2->bottom)); if (frc1Empty && frc2Empty) { *prcDst = {}; return FALSE; } if (frc1Empty) { *prcDst = *prcSrc2; return TRUE; } if (frc2Empty) { *prcDst = *prcSrc1; return TRUE; } /* * form the union of the two non-empty rects */ prcDst->left = min(prcSrc1->left, prcSrc2->left); prcDst->top = min(prcSrc1->top, prcSrc2->top); prcDst->right = max(prcSrc1->right, prcSrc2->right); prcDst->bottom = max(prcSrc1->bottom, prcSrc2->bottom); return TRUE; } #endif ================================================ FILE: src/VideoDecode.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include #include namespace D3D12TranslationLayer { // Arbitrary constants needed for buffer size computation. Worst case scenario is assumed to be 8K. const UINT MAX_WIDTH = 8192; const UINT MAX_HEIGHT = 4096; const UINT MIN_WIDTH = 256; const UINT MIN_HEIGHT = 256; const UINT MIN_ALIGN = 64; ///////////////////////////////////////// // Decoder configuration ///////////////////////////////////////// typedef struct { UINT BufferTypeCount; struct { VIDEO_DECODE_BUFFER_TYPE Type; UINT BaseSize; } Data[VIDEO_DECODE_BUFFER_TYPE_SIZEOF]; struct { UINT Num; UINT Denom; UINT MinWidth; // if clip resolution is greater than this, use multiplier, otherwise, use 1:1 ratio UINT MinHeight; } CompressedStreamMultiplier; } ProfileBufferInfo; // // As per codec team, we will be assuming 50% compression over max YUV size for the newer codecs, 0% compression for older codecs. // ProfileBufferInfo VP9BufferInfo = { 3, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PicParams_VP9), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_Slice_VPx_Short), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, // Assuming compresion ratio 1:1 for VP9 as we got bug reports for VP9 clips hitting corruption due to insufficient bitstream size { 1, 1, 0, 0 } }; ProfileBufferInfo VP8BufferInfo = { 3, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PicParams_VP8), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_Slice_VPx_Short), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, { 1, 2, 0, 0 }, }; ProfileBufferInfo H264BufferInfo = { 4, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PicParams_H264), VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX, sizeof(DXVA_Qmatrix_H264), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_Slice_H264_Short), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, { 1, 2, 1280, 720 }, }; ProfileBufferInfo H264MVCBufferInfo = { 4, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PicParams_H264_MVC), VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX, sizeof(DXVA_Qmatrix_H264), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_Slice_H264_Short), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, { 1, 2, 1280, 720 }, }; ProfileBufferInfo HEVCBufferInfo = { 4, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PicParams_HEVC), VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX, sizeof(DXVA_Qmatrix_HEVC), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_Slice_HEVC_Short), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, { 1, 2, 1280, 720 }, }; ProfileBufferInfo MPEG2BufferInfo = { 4, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PictureParameters), VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX, sizeof(DXVA_QmatrixData), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_SliceInfo), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, { 1, 1, 0, 0 }, }; ProfileBufferInfo VC1BufferInfo = { 4, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PictureParameters), VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX, sizeof(DXVA_QmatrixData), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_SliceInfo), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, { 1, 1, 0, 0 }, }; ProfileBufferInfo MPEG4PT2BufferInfo = { 4, { VIDEO_DECODE_BUFFER_TYPE_PICTURE_PARAMETERS, sizeof(DXVA_PicParams_MPEG4_PART2), VIDEO_DECODE_BUFFER_TYPE_INVERSE_QUANTIZATION_MATRIX, sizeof(DXVA_QmatrixData), VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL, sizeof(DXVA_SliceInfo), VIDEO_DECODE_BUFFER_TYPE_BITSTREAM, 0 }, { 1, 1, 0, 0 }, }; // Profile Info // Each DecodeProfile must be unique // Each unique combination of DecodeProfileType and DecodeProfileBitDepth must map to exactly one DecodeProfile. struct ProfileInfo { GUID DecodeProfile; VIDEO_DECODE_PROFILE_TYPE DecodeProfileType; VIDEO_DECODE_PROFILE_BIT_DEPTH DecodeProfileBitDepth; ProfileBufferInfo BufferInfo; } AvailableProfiles[] = { // DecodeProfile DecodeProfileType DecodeProfileBitDepth BufferInfo { D3D12_VIDEO_DECODE_PROFILE_MPEG2, VIDEO_DECODE_PROFILE_TYPE_MPEG2, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, MPEG2BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_MPEG1_AND_MPEG2, VIDEO_DECODE_PROFILE_TYPE_MPEG2, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, MPEG2BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_H264, VIDEO_DECODE_PROFILE_TYPE_H264, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, H264BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_H264_STEREO_PROGRESSIVE, VIDEO_DECODE_PROFILE_TYPE_H264, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, H264BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_H264_STEREO, VIDEO_DECODE_PROFILE_TYPE_H264, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, H264BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_H264_MULTIVIEW, VIDEO_DECODE_PROFILE_TYPE_H264_MVC, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, H264MVCBufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_VC1, VIDEO_DECODE_PROFILE_TYPE_VC1, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, VC1BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_VC1_D2010, VIDEO_DECODE_PROFILE_TYPE_VC1, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, VC1BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_MPEG4PT2_SIMPLE, VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, MPEG4PT2BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_MPEG4PT2_ADVSIMPLE_NOGMC, VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, MPEG4PT2BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN, VIDEO_DECODE_PROFILE_TYPE_HEVC, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, HEVCBufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_HEVC_MAIN10, VIDEO_DECODE_PROFILE_TYPE_HEVC, VIDEO_DECODE_PROFILE_BIT_DEPTH_10_BIT, HEVCBufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_VP9, VIDEO_DECODE_PROFILE_TYPE_VP9, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, VP9BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_VP9_10BIT_PROFILE2, VIDEO_DECODE_PROFILE_TYPE_VP9, VIDEO_DECODE_PROFILE_BIT_DEPTH_10_BIT, VP9BufferInfo }, { D3D12_VIDEO_DECODE_PROFILE_VP8, VIDEO_DECODE_PROFILE_TYPE_VP8, VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT, VP8BufferInfo }, }; //---------------------------------------------------------------------------------------------------------------------------------- VideoDecoder::VideoDecoder( ImmediateContext *pContext, ID3D12VideoDevice* pVideoDeviceNoRef, const D3D12_VIDEO_DECODER_DESC& desc ) : DeviceChildImpl(pContext) { ThrowFailure(pVideoDeviceNoRef->CreateVideoDecoder(&desc, IID_PPV_ARGS(GetForCreate()))); } //---------------------------------------------------------------------------------------------------------------------------------- VideoDecoderHeap::VideoDecoderHeap( ImmediateContext *pContext, ID3D12VideoDevice* pVideoDeviceNoRef, const D3D12_VIDEO_DECODER_HEAP_DESC& desc ) : DeviceChildImpl(pContext) { ThrowFailure(pVideoDeviceNoRef->CreateVideoDecoderHeap(&desc, IID_PPV_ARGS(GetForCreate()))); } //---------------------------------------------------------------------------------------------------------------------------------- VideoDecode::VideoDecode(_In_ ImmediateContext *pDevice, VideoDecodeCreationArgs const& args) : DeviceChild(pDevice) , m_decodingStatus(pDevice) , m_profileType(GetProfileType(args.Desc.DecodeProfile)) , m_decodeFormat(args.Desc.DecodeFormat) { if (!m_pParent->m_pDevice12_1) { ThrowFailure(E_NOINTERFACE); } ThrowFailure(m_pParent->m_pDevice12_1->QueryInterface(&m_spVideoDevice)); D3D12_VIDEO_DECODE_CONFIGURATION decodeConfiguration = { args.Desc.DecodeProfile, D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE, args.Config.InterlaceType }; D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {}; decodeSupport.NodeIndex = m_pParent->GetNodeIndex(); decodeSupport.Configuration = decodeConfiguration; decodeSupport.Width = args.Desc.Width; decodeSupport.Height = args.Desc.Height; decodeSupport.DecodeFormat = args.Desc.DecodeFormat; // no info from DX11 on framerate/bitrate decodeSupport.FrameRate.Numerator = 0; decodeSupport.FrameRate.Denominator = 0; decodeSupport.BitRate = 0; ThrowFailure(m_spVideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &decodeSupport, sizeof(decodeSupport))); if (!(decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) { ThrowFailure(E_INVALIDARG); } m_configurationFlags = decodeSupport.ConfigurationFlags; m_tier = decodeSupport.DecodeTier; m_decoderDesc.NodeMask = m_pParent->GetNodeMask(); m_decoderDesc.Configuration = decodeConfiguration; m_spVideoDecoder = std::make_unique(m_pParent, m_spVideoDevice.get(), m_decoderDesc); m_decoderHeapDesc.NodeMask = m_pParent->GetNodeMask(); m_decoderHeapDesc.Configuration = decodeConfiguration; m_decoderHeapDesc.DecodeWidth = args.Desc.Width; m_decoderHeapDesc.DecodeHeight = args.Desc.Height; m_decoderHeapDesc.Format = args.Desc.DecodeFormat; m_decoderHeapDesc.MaxDecodePictureBufferCount = 0; m_ConfigDecoderSpecific = args.Config.ConfigDecoderSpecific; VIDEO_DECODE_PROFILE_BIT_DEPTH bitDepth = GetProfileBitDepth(args.Desc.DecodeProfile); m_DecodeProfilePerBitDepth[GetIndex(bitDepth)] = args.Desc.DecodeProfile; } //---------------------------------------------------------------------------------------------------------------------------------- VideoDecode::~VideoDecode() noexcept { // Stop exception here, as destructor is noexcept try { m_pParent->Flush(COMMAND_LIST_TYPE_VIDEO_DECODE_MASK); // throws } catch (_com_error&) { // success = false; } catch (std::bad_alloc&) { // success = false; } } //---------------------------------------------------------------------------------------------------------------------------------- void VideoDecode::ManageResolutionChange(const VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS *pOutputArguments) { UINT width; UINT height; UINT16 maxDPB; ThrowFailure(GetDecodeFrameInfo(&width, &height, &maxDPB)); ID3D12Resource *pTextureArray = pOutputArguments->pOutputTexture2D->GetUnderlyingResource(); if (!pTextureArray) { ThrowFailure(E_INVALIDARG); } D3D12_RESOURCE_DESC outputResourceDesc = pTextureArray->GetDesc(); VIDEO_DECODE_PROFILE_BIT_DEPTH resourceBitDepth = GetFormatBitDepth(outputResourceDesc.Format); if (m_decodeFormat != outputResourceDesc.Format) { D3D12_VIDEO_DECODER_DESC decoderDesc = m_decoderDesc; decoderDesc.Configuration.DecodeProfile = GetDecodeProfile(m_profileType, resourceBitDepth); m_spVideoDecoder = std::make_unique(m_pParent, m_spVideoDevice.get(), decoderDesc); m_decoderDesc = decoderDesc; } if ( !m_spCurrentDecoderHeap || m_decodeFormat != outputResourceDesc.Format || m_decoderHeapDesc.DecodeWidth != width || m_decoderHeapDesc.DecodeHeight != height || m_decoderHeapDesc.MaxDecodePictureBufferCount < maxDPB) { UINT16 referenceCount = maxDPB; bool fArrayOfTexture = false; bool fReferenceOnly = (m_configurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) != 0; ReferenceOnlyDesc* pReferenceOnlyDesc = nullptr; ReferenceOnlyDesc referenceOnlyDesc; referenceOnlyDesc.Width = outputResourceDesc.Width; referenceOnlyDesc.Height = outputResourceDesc.Height; referenceOnlyDesc.Format = outputResourceDesc.Format; if (pOutputArguments->ConversionArguments.Enable) { // Decode output conversion is on, create a DPB only array to hold the references. // All indices are re-mapped in host decoder to address just the size of the DPB array (given by ReferenceFrameCount). referenceCount = (UINT16)pOutputArguments->ConversionArguments.ReferenceFrameCount; referenceOnlyDesc.Width = pOutputArguments->ConversionArguments.ReferenceInfo.Width; referenceOnlyDesc.Height = pOutputArguments->ConversionArguments.ReferenceInfo.Height; referenceOnlyDesc.Format = pOutputArguments->ConversionArguments.ReferenceInfo.Format.Format; pReferenceOnlyDesc = &referenceOnlyDesc; } else if (fReferenceOnly) { pReferenceOnlyDesc = &referenceOnlyDesc; } if (outputResourceDesc.DepthOrArraySize != 1) { // When DepthOrArraySize is not 1 Enable Texture Array Mode. This selection // is made regardless of ConfigDecoderSpecific during decode creation. // The reference indices are in a range of zero to the ArraySize and refer // directly to array subresources. referenceCount = outputResourceDesc.DepthOrArraySize; } else { // A DepthOrArraySize of 1 indicates that Array of Texture Mode is enabled. // The reference indices are not in the range of 0 to MaxDPB, but instead // are in a range determined by the caller that the driver doesn't appear to have // a way of knowing. To optimize the reference only case, 11on12 must support // a level of indirection to map the callers indices into references. assert(m_tier >= D3D12_VIDEO_DECODE_TIER_2 || fReferenceOnly); fArrayOfTexture = m_tier >= D3D12_VIDEO_DECODE_TIER_2; } m_referenceDataManager.Resize(referenceCount, pReferenceOnlyDesc, fArrayOfTexture); // throw( bad alloc ) D3D12_VIDEO_DECODER_HEAP_DESC decoderHeapDesc = m_decoderHeapDesc; decoderHeapDesc.Configuration.DecodeProfile = GetDecodeProfile(m_profileType, resourceBitDepth); decoderHeapDesc.DecodeWidth = width; decoderHeapDesc.DecodeHeight = height; decoderHeapDesc.Format = outputResourceDesc.Format; decoderHeapDesc.MaxDecodePictureBufferCount = maxDPB; m_spCurrentDecoderHeap = std::make_shared(m_pParent, m_spVideoDevice.get(), decoderHeapDesc); m_decoderHeapDesc = decoderHeapDesc; } m_decodeFormat = outputResourceDesc.Format; } struct ETW_Pic_Entry { UINT8 Index7Bits; UINT8 AssociatedFlag; UINT8 bPicEntry; }; //---------------------------------------------------------------------------------------------------------------------------------- template static ETW_Pic_Entry LogCopyPicEntry(const T& src) { ETW_Pic_Entry dest; dest.Index7Bits = src.Index7Bits; dest.AssociatedFlag = src.AssociatedFlag; dest.bPicEntry = src.bPicEntry; return dest; } //---------------------------------------------------------------------------------------------------------------------------------- template static void LogCopyPicEntries(ETW_Pic_Entry (&dstPicEntries)[dstPicEntriesSize], T (&srcPicEntries)[srcPicEntriesSize], UINT16& copiedPicEntries) { static_assert(dstPicEntriesSize >= srcPicEntriesSize, "Dst must be large enough to hold all of src."); for (UINT16 i(0); i < srcPicEntriesSize; ++i) { dstPicEntries[i] = LogCopyPicEntry(srcPicEntries[i]); } copiedPicEntries = static_cast(srcPicEntriesSize); } //---------------------------------------------------------------------------------------------------------------------------------- void VideoDecode::LogPicParams() const { if ( g_hTracelogging && TraceLoggingProviderEnabled(g_hTracelogging, 0, 0)) { static_assert(sizeof(ETW_Pic_Entry) == 3); //const size_t MaxRefPicListLength = max(_countof(DXVA_PicParams_H264::RefFrameList), max(_countof(DXVA_PicParams_HEVC::RefPicList), _countof(DXVA_PicParams_VP9::ref_frame_map))); //constexpr size_t MaxRefPicListLength = max(std::extent::value, max(std::extent::value, std::extent::value)); const size_t MaxRefPicListLength = _countof(DXVA_PicParams_H264::RefFrameList) > _countof(DXVA_PicParams_H264_MVC::RefFrameList) ? _countof(DXVA_PicParams_H264::RefFrameList) : _countof(DXVA_PicParams_H264_MVC::RefFrameList) > _countof(DXVA_PicParams_HEVC::RefPicList) ? _countof(DXVA_PicParams_H264_MVC::RefFrameList) : _countof(DXVA_PicParams_HEVC::RefPicList) > _countof(DXVA_PicParams_VP9::ref_frame_map) ? _countof(DXVA_PicParams_HEVC::RefPicList) : _countof(DXVA_PicParams_VP9::ref_frame_map); ETW_Pic_Entry CurrPic = {}; ETW_Pic_Entry RefPicList[MaxRefPicListLength] = {}; UINT16 RefPicListLength = 0; switch (m_profileType) { case VIDEO_DECODE_PROFILE_TYPE_VP9: { // From the VP9 DXVA Spec: // If bPicEntry is not 0xFF, the entry may be used as a reference surface for decoding the current picture or // a subsequent picture in decoding order. All uncompressed surfaces that correspond to frames that may be used for reference // in the decoding process of the current picture or any subsequent picture shall be present in the ref_frame_map[] array auto pPicParams = GetPicParams(); LogCopyPicEntries(RefPicList, pPicParams->ref_frame_map, RefPicListLength); CurrPic = LogCopyPicEntry(pPicParams->CurrPic); } break; case VIDEO_DECODE_PROFILE_TYPE_VP8: { // From the VP8 DXVA Spec: // Specify the frame buffer/surface indices for the altref frame, the golden frame, and the previous reconstructed frame. // In this context, the AssociatedFlag has no meaning and shall be 0, and the accelerator shall ignore its value. // The VP8 decoder needs to maintain four YUV frame buffers/surfaces for decoding purposes. // These buffers hold the current frame being reconstructed, the previous reconstructed frame, the most recent golden frame, // and the most recent altref frame auto pPicParams = GetPicParams(); RefPicListLength = 3; RefPicList[0] = LogCopyPicEntry(pPicParams->alt_fb_idx); RefPicList[1] = LogCopyPicEntry(pPicParams->gld_fb_idx); RefPicList[2] = LogCopyPicEntry(pPicParams->lst_fb_idx); CurrPic = LogCopyPicEntry(pPicParams->CurrPic); } break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: { // From the H265 DXVA Spec: // Index7Bits // An index that identifies an uncompressed surface for the CurrPic or RefPicList member of the picture parameters structure(section 4.0). // When Index7Bits is used in the CurrPic and RefPicList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface. // When Index7Bits is 127 (0x7F), this indicates that it does not contain a valid index. auto pPicParams = GetPicParams(); LogCopyPicEntries(RefPicList, pPicParams->RefPicList, RefPicListLength); CurrPic = LogCopyPicEntry(pPicParams->CurrPic); } break; case VIDEO_DECODE_PROFILE_TYPE_H264: { // From H264 DXVA spec: // Index7Bits // An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0) // When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface. // When Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies the surface indirectly, as an index into the RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. // In all cases, when Index7Bits does not contain a valid index, the value is 127. auto pPicParams = GetPicParams(); LogCopyPicEntries(RefPicList, pPicParams->RefFrameList, RefPicListLength); CurrPic = LogCopyPicEntry(pPicParams->CurrPic); } break; case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: { // From H264 DXVA spec: // Index7Bits // An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0) // When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface. // When Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies the surface indirectly, as an index into the RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. // In all cases, when Index7Bits does not contain a valid index, the value is 127. auto pPicParams = GetPicParams(); LogCopyPicEntries(RefPicList, pPicParams->RefFrameList, RefPicListLength); CurrPic = LogCopyPicEntry(pPicParams->CurrPic); } break; case VIDEO_DECODE_PROFILE_TYPE_VC1: case VIDEO_DECODE_PROFILE_TYPE_MPEG2: { auto pPicParams = GetPicParams(); RefPicListLength = 2; RefPicList[0].Index7Bits = static_cast(pPicParams->wForwardRefPictureIndex); RefPicList[1].Index7Bits = static_cast(pPicParams->wBackwardRefPictureIndex); CurrPic.Index7Bits = static_cast(pPicParams->wDecodedPictureIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: { auto pPicParams = GetPicParams(); RefPicListLength = 2; RefPicList[0].Index7Bits = static_cast(pPicParams->wForwardRefPictureIndex); RefPicList[1].Index7Bits = static_cast(pPicParams->wBackwardRefPictureIndex); CurrPic.Index7Bits = static_cast(pPicParams->wDecodedPictureIndex); } break; default: ThrowFailure(E_NOTIMPL); break; } TraceLoggingWrite(g_hTracelogging, "DecodePictureLists", TraceLoggingPointer(m_spVideoDecoder->GetForImmediateUse(), "pID3D12Decoder"), TraceLoggingStruct(3, "CurrPic"), TraceLoggingUInt8(CurrPic.Index7Bits, "Index7Bits"), TraceLoggingUInt8(CurrPic.AssociatedFlag, "AssociatedFlag"), TraceLoggingUInt8(CurrPic.bPicEntry, "bPicEntry"), TraceLoggingPackedData(&RefPicListLength, sizeof(RefPicListLength)), // Data for the array count TraceLoggingPackedData(RefPicList, sizeof(RefPicList)), // Data for the array content TraceLoggingPackedStructArray(3, "RefPicList"), // Structure metadata TraceLoggingPackedMetadata(TlgInUINT8, "Index7Bits"), TraceLoggingPackedMetadata(TlgInUINT8, "AssociatedFlag"), TraceLoggingPackedMetadata(TlgInUINT8, "bPicEntry")); } } //---------------------------------------------------------------------------------------------------------------------------------- void VideoDecode::ReleaseUnusedReferences() { // Method overview // 1. Clear the following m_referenceDataManager descriptors: textures, textureSubresources and decoder heap by calling m_referenceDataManager.ResetReferenceFramesInformation() // 2. Codec specific strategy in switch statement regarding reference frames eviction policy // 3. Call m_referenceDataManager.ReleaseUnusedReferences(); at the end of this method. Any references (and texture allocations associated) that were left not marked as used in m_referenceDataManager by step (2) are lost. m_referenceDataManager.ResetReferenceFramesInformation(); switch (m_profileType) { case VIDEO_DECODE_PROFILE_TYPE_VP9: { // References residency policy: Mark all references as unused and only mark again as used the ones used by this frame m_referenceDataManager.ResetInternalTrackingReferenceUsage(); m_referenceDataManager.MarkReferencesInUse(GetPicParams()->ref_frame_map); } break; case VIDEO_DECODE_PROFILE_TYPE_VP8: { // References residency policy: Mark all references as unused and only mark again as used the ones used by this frame m_referenceDataManager.ResetInternalTrackingReferenceUsage(); auto pPicParams = GetPicParams(); m_referenceDataManager.MarkReferenceInUse(pPicParams->alt_fb_idx.Index7Bits); m_referenceDataManager.MarkReferenceInUse(pPicParams->gld_fb_idx.Index7Bits); m_referenceDataManager.MarkReferenceInUse(pPicParams->lst_fb_idx.Index7Bits); } break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: { // References residency policy: Mark all references as unused and only mark again as used the ones used by this frame m_referenceDataManager.ResetInternalTrackingReferenceUsage(); m_referenceDataManager.MarkReferencesInUse(GetPicParams()->RefPicList); } break; case VIDEO_DECODE_PROFILE_TYPE_H264: { // References residency policy: Mark all references as unused and only mark again as used the ones used by this frame m_referenceDataManager.ResetInternalTrackingReferenceUsage(); m_referenceDataManager.MarkReferencesInUse(GetPicParams()->RefFrameList); } break; case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: { // References residency policy: Mark all references as unused and only mark again as used the ones used by this frame m_referenceDataManager.ResetInternalTrackingReferenceUsage(); m_referenceDataManager.MarkReferencesInUse(GetPicParams()->RefFrameList); } break; case VIDEO_DECODE_PROFILE_TYPE_VC1: case VIDEO_DECODE_PROFILE_TYPE_MPEG2: { auto pPicParams = GetPicParams(); // If the current frame uses no references, don't evict the current active references as future frames might use them as references again if ((pPicParams->wForwardRefPictureIndex != DXVA_INVALID_PICTURE_INDEX) || (pPicParams->wBackwardRefPictureIndex != DXVA_INVALID_PICTURE_INDEX)) { // References residency policy for frames that use at least one reference: Mark all references as unused and only mark again as used the ones used by this frame m_referenceDataManager.ResetInternalTrackingReferenceUsage(); } m_referenceDataManager.MarkReferenceInUse(pPicParams->wForwardRefPictureIndex); m_referenceDataManager.MarkReferenceInUse(pPicParams->wBackwardRefPictureIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: { // References residency policy: Mark all references as unused and only mark again as used the ones used by this frame m_referenceDataManager.ResetInternalTrackingReferenceUsage(); auto pPicParams = GetPicParams(); m_referenceDataManager.MarkReferenceInUse(pPicParams->wForwardRefPictureIndex); m_referenceDataManager.MarkReferenceInUse(pPicParams->wBackwardRefPictureIndex); } break; default: ThrowFailure(E_NOTIMPL); break; } // Releases the underlying reference picture texture objects of all references that were not marked as used in this method. m_referenceDataManager.ReleaseUnusedReferences(); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::PrepareForDecodeFrame(const VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *pInputArguments, const VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS *pOutputArguments) { UNREFERENCED_PARAMETER(pInputArguments); if (!pOutputArguments->pOutputTexture2D) { ThrowFailure(E_INVALIDARG); } LogPicParams(); ReleaseUnusedReferences(); { ManageResolutionChange(pOutputArguments); UpdateCurrPic( pOutputArguments->pOutputTexture2D, pOutputArguments->SubresourceSubset.MinSubresource()); } switch (m_profileType) { case VIDEO_DECODE_PROFILE_TYPE_VP9: { // From the VP9 DXVA Spec: // If bPicEntry is not 0xFF, the entry may be used as a reference surface for decoding the current picture or // a subsequent picture in decoding order. All uncompressed surfaces that correspond to frames that may be used for reference // in the decoding process of the current picture or any subsequent picture shall be present in the ref_frame_map[] array m_referenceDataManager.UpdateEntries(GetPicParams()->ref_frame_map); // frame_refs lists the references used for the current decode operation m_referenceDataManager.GetUpdatedEntries(GetPicParams()->frame_refs); } break; case VIDEO_DECODE_PROFILE_TYPE_VP8: { // From the VP8 DXVA Spec: // Specify the frame buffer/surface indices for the altref frame, the golden frame, and the previous reconstructed frame. // In this context, the AssociatedFlag has no meaning and shall be 0, and the accelerator shall ignore its value. // The VP8 decoder needs to maintain four YUV frame buffers/surfaces for decoding purposes. // These buffers hold the current frame being reconstructed, the previous reconstructed frame, the most recent golden frame, // and the most recent altref frame auto pPicParams = GetPicParams(); pPicParams->alt_fb_idx.Index7Bits = m_referenceDataManager.UpdateEntry(pPicParams->alt_fb_idx.Index7Bits); pPicParams->gld_fb_idx.Index7Bits = m_referenceDataManager.UpdateEntry(pPicParams->gld_fb_idx.Index7Bits); pPicParams->lst_fb_idx.Index7Bits = m_referenceDataManager.UpdateEntry(pPicParams->lst_fb_idx.Index7Bits); } break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: { // From the H265 DXVA Spec: // Index7Bits // An index that identifies an uncompressed surface for the CurrPic or RefPicList member of the picture parameters structure(section 4.0). // When Index7Bits is used in the CurrPic and RefPicList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface. // When Index7Bits is 127 (0x7F), this indicates that it does not contain a valid index. m_referenceDataManager.UpdateEntries(GetPicParams()->RefPicList); } break; case VIDEO_DECODE_PROFILE_TYPE_H264: { // From H264 DXVA spec: // Index7Bits // An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0) // When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface. // When Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies the surface indirectly, as an index into the RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. // In all cases, when Index7Bits does not contain a valid index, the value is 127. m_referenceDataManager.UpdateEntries(GetPicParams()->RefFrameList); } break; case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: { // From H264 DXVA spec: // Index7Bits // An index that identifies an uncompressed surface for the CurrPic or RefFrameList member of the picture parameters structure(section 4.0) or the RefPicList member of the slice control data structure(section 6.0) // When Index7Bits is used in the CurrPic and RefFrameList members of the picture parameters structure, the value directly specifies the DXVA index of an uncompressed surface. // When Index7Bits is used in the RefPicList member of the slice control data structure, the value identifies the surface indirectly, as an index into the RefFrameList array of the associated picture parameters structure.For more information, see section 6.2. // In all cases, when Index7Bits does not contain a valid index, the value is 127. m_referenceDataManager.UpdateEntries(GetPicParams()->RefFrameList); } break; case VIDEO_DECODE_PROFILE_TYPE_VC1: case VIDEO_DECODE_PROFILE_TYPE_MPEG2: { auto pPicParams = GetPicParams(); constexpr UINT VC1_PICDEBLOCKED_DEBLOCKING_BIT = 2; constexpr UINT VC1_PICDEBLOCKED_DERINGING_BIT = 3; constexpr UINT VC1_PICDEBLOCKED_REDUCED_DYNAMIC_RANGE_BIT = 5; constexpr UINT postProcessingOptions = (1 << VC1_PICDEBLOCKED_DEBLOCKING_BIT) | (1 << VC1_PICDEBLOCKED_DERINGING_BIT) | (1 << VC1_PICDEBLOCKED_REDUCED_DYNAMIC_RANGE_BIT); // No post-processing allowed, so we need to modify picture parameters for certain profiles where it can be turned on pPicParams->wDeblockedPictureIndex = DXVA_INVALID_PICTURE_INDEX; pPicParams->bPicDeblocked &= ~postProcessingOptions; pPicParams->bPicOBMC = 0; pPicParams->bPicBinPB = 0; pPicParams->wForwardRefPictureIndex = m_referenceDataManager.UpdateEntry(pPicParams->wForwardRefPictureIndex); pPicParams->wBackwardRefPictureIndex = m_referenceDataManager.UpdateEntry(pPicParams->wBackwardRefPictureIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: { auto pPicParams = GetPicParams(); // No post-processing allowed, so we need to modify picture parameters for certain profiles where it can be turned on pPicParams->wDeblockedPictureIndex = DXVA_INVALID_PICTURE_INDEX; pPicParams->unPicPostProc = 0; pPicParams->wForwardRefPictureIndex = m_referenceDataManager.UpdateEntry(pPicParams->wForwardRefPictureIndex); pPicParams->wBackwardRefPictureIndex = m_referenceDataManager.UpdateEntry(pPicParams->wBackwardRefPictureIndex); } break; default: ThrowFailure(E_NOTIMPL); break; } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::DecodeFrame(const VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *pInputArguments, const VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS *pOutputArguments) { CachePicParams(pInputArguments); // translate input D3D12 structure D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS d3d12InputArguments = {}; d3d12InputArguments.CompressedBitstream.pBuffer = pInputArguments->CompressedBitstream.pBuffer->GetUnderlyingResource(); d3d12InputArguments.CompressedBitstream.Offset = pInputArguments->CompressedBitstream.Offset + pInputArguments->CompressedBitstream.pBuffer->GetSubresourcePlacement(0).Offset; d3d12InputArguments.CompressedBitstream.Size = pInputArguments->CompressedBitstream.Size; m_pParent->GetResourceStateManager().TransitionResource(pInputArguments->CompressedBitstream.pBuffer, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ, COMMAND_LIST_TYPE::VIDEO_DECODE); PrepareForDecodeFrame(pInputArguments, pOutputArguments); d3d12InputArguments.NumFrameArguments = pInputArguments->FrameArgumentsCount; for (UINT i = 0; i < d3d12InputArguments.NumFrameArguments; i++) { D3D12_VIDEO_DECODE_FRAME_ARGUMENT& frameArgument = d3d12InputArguments.FrameArguments[i]; frameArgument = pInputArguments->FrameArguments[i]; if (frameArgument.Type == D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS) { assert(frameArgument.Size == m_modifiablePicParamsAllocationSize); frameArgument.pData = GetPicParams(); } } d3d12InputArguments.ReferenceFrames.ppTexture2Ds = m_referenceDataManager.textures.data(); d3d12InputArguments.ReferenceFrames.pSubresources = m_referenceDataManager.texturesSubresources.data(); d3d12InputArguments.ReferenceFrames.NumTexture2Ds = static_cast(m_referenceDataManager.Size()); d3d12InputArguments.pHeap = m_spCurrentDecoderHeap->GetForUse(COMMAND_LIST_TYPE::VIDEO_DECODE); // translate output D3D12 structure D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS1 d3d12OutputArguments = {}; d3d12OutputArguments.pOutputTexture2D = pOutputArguments->pOutputTexture2D->GetUnderlyingResource(); d3d12OutputArguments.OutputSubresource = pOutputArguments->SubresourceSubset.MinSubresource(); if (m_referenceDataManager.IsReferenceOnly()) { d3d12OutputArguments.ConversionArguments.Enable = TRUE; m_referenceDataManager.TransitionReferenceOnlyOutput(d3d12OutputArguments.ConversionArguments.pReferenceTexture2D, d3d12OutputArguments.ConversionArguments.ReferenceSubresource); const D3D12_RESOURCE_DESC &descReference = d3d12OutputArguments.ConversionArguments.pReferenceTexture2D->GetDesc(); d3d12OutputArguments.ConversionArguments.DecodeColorSpace = CDXGIColorSpaceHelper::ConvertFromLegacyColorSpace(!CD3D11FormatHelper::YUV(descReference.Format), CD3D11FormatHelper::GetBitsPerUnit(descReference.Format), /* StudioRGB= */ false, /* P709= */ true, /* StudioYUV= */ true); const D3D12_RESOURCE_DESC &descOutput = d3d12OutputArguments.pOutputTexture2D->GetDesc(); d3d12OutputArguments.ConversionArguments.OutputColorSpace = CDXGIColorSpaceHelper::ConvertFromLegacyColorSpace(!CD3D11FormatHelper::YUV(descOutput.Format), CD3D11FormatHelper::GetBitsPerUnit(descOutput.Format), /* StudioRGB= */ false, /* P709= */ true, /* StudioYUV= */ true); const D3D12_VIDEO_DECODER_HEAP_DESC& HeapDesc = m_spCurrentDecoderHeap->GetDesc(); d3d12OutputArguments.ConversionArguments.OutputWidth = HeapDesc.DecodeWidth; d3d12OutputArguments.ConversionArguments.OutputHeight = HeapDesc.DecodeHeight; } else { d3d12OutputArguments.ConversionArguments.Enable = FALSE; } m_pParent->GetResourceStateManager().TransitionSubresources(pOutputArguments->pOutputTexture2D, pOutputArguments->SubresourceSubset, D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, COMMAND_LIST_TYPE::VIDEO_DECODE); static_assert(_countof(pOutputArguments->Histograms) == _countof(d3d12OutputArguments.Histograms), "Must keep histogram component count in sync"); for (UINT i = 0; i < _countof(d3d12OutputArguments.Histograms); i++) { if (pOutputArguments->Histograms[i].pBuffer) { d3d12OutputArguments.Histograms[i].pBuffer = pOutputArguments->Histograms[i].pBuffer->GetUnderlyingResource(); d3d12OutputArguments.Histograms[i].Offset = pOutputArguments->Histograms[i].Offset + pOutputArguments->Histograms[i].pBuffer->GetSubresourcePlacement(0).Offset; m_pParent->GetResourceStateManager().TransitionResource(pOutputArguments->Histograms[i].pBuffer, D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, COMMAND_LIST_TYPE::VIDEO_DECODE); } else { d3d12OutputArguments.Histograms[i].pBuffer = nullptr; d3d12OutputArguments.Histograms[i].Offset = 0; } } // submit DecodeFrame // decode barrier for the output buffer m_pParent->GetResourceStateManager().ApplyAllResourceTransitions(); m_pParent->GetVideoDecodeCommandList()->DecodeFrame1( m_spVideoDecoder->GetForUse(COMMAND_LIST_TYPE::VIDEO_DECODE), &d3d12OutputArguments, &d3d12InputArguments); UINT statusReportFeedbackNumber; DXVA_PicEntry CurrPic; UCHAR field_pic_flag; GetStatusReportFeedbackNumber(/*_Out_*/statusReportFeedbackNumber, /*_Out_*/CurrPic, /*_Out_*/field_pic_flag); // throw( _com_error ) m_decodingStatus.EndQuery(statusReportFeedbackNumber, CurrPic, field_pic_flag); // throw( _com_error ) if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "Decode - StatusReportFeedbackNumber", TraceLoggingPointer(m_spVideoDecoder->GetForImmediateUse(), "pID3D12Decoder"), TraceLoggingValue(statusReportFeedbackNumber, "statusReportFeedbackNumber")); } m_pParent->SubmitCommandList(COMMAND_LIST_TYPE::VIDEO_DECODE); // throws } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::CachePicParams(const VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *pInputArguments) { UINT i = 0; for (; i < pInputArguments->FrameArgumentsCount; i++) { const D3D12_VIDEO_DECODE_FRAME_ARGUMENT& frameArgument = pInputArguments->FrameArguments[i]; if (frameArgument.Type == D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS) { break; } } if (i >= pInputArguments->FrameArgumentsCount) { // No pic params. ThrowFailure(E_INVALIDARG); } const D3D12_VIDEO_DECODE_FRAME_ARGUMENT& frameArgument = pInputArguments->FrameArguments[i]; if ( frameArgument.pData == nullptr || frameArgument.Size == 0) { // Invalid pic params. ThrowFailure(E_INVALIDARG); } if (m_modifiablePicParamsAllocationSize < frameArgument.Size) { m_modifiablePicParams.reset(new char[frameArgument.Size]); m_modifiablePicParamsAllocationSize = frameArgument.Size; } memcpy(m_modifiablePicParams.get(), frameArgument.pData, frameArgument.Size); } //---------------------------------------------------------------------------------------------------------------------------------- static inline int LengthFromMinCb(int length, int cbsize) { return length * (1 << cbsize); } //---------------------------------------------------------------------------------------------------------------------------------- static inline bool IsAdvancedProfile(DXVA_PictureParameters *pPicParams) { return (((pPicParams->bBidirectionalAveragingMode >> 3) & 1) != 0); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT VideoDecode::GetDecodeFrameInfo(UINT *pWidth, UINT *pHeight, UINT16 *pMaxDPB) noexcept { HRESULT hr = S_OK; *pWidth = 0; *pHeight = 0; *pMaxDPB = 0; switch (m_profileType) { case VIDEO_DECODE_PROFILE_TYPE_VC1: { auto pPicParams = GetPicParams(); if (IsAdvancedProfile(pPicParams)) { *pWidth = pPicParams->wPicWidthInMBminus1 + 1; *pHeight = pPicParams->wPicHeightInMBminus1 + 1; } else { *pWidth = (pPicParams->wPicWidthInMBminus1 + 1) * (pPicParams->bMacroblockWidthMinus1 + 1); *pHeight = (pPicParams->wPicHeightInMBminus1 + 1) * (pPicParams->bMacroblockHeightMinus1 + 1); } *pMaxDPB = 2 + 1; } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG2: { auto pPicParams = GetPicParams(); if (IsAdvancedProfile(pPicParams)) { *pWidth = pPicParams->wPicWidthInMBminus1 + 1; *pHeight = pPicParams->wPicHeightInMBminus1 + 1; } else { *pWidth = (pPicParams->wPicWidthInMBminus1 + 1) * (pPicParams->bMacroblockWidthMinus1 + 1); *pHeight = (pPicParams->wPicHeightInMBminus1 + 1) * (pPicParams->bMacroblockHeightMinus1 + 1); } *pMaxDPB = 2 + 1; // Code below adjusts pHeight if necessary for interlaced video // These constants below correspond to picture_structure parameter of the MPEG2 spec. // bPicStructure // Indicates whether the current picture is a top - field picture(a value 1), a bottom - field picture(a value 2), or a frame picture(a value 3). // In progressive - scan frame - structured coding such as in H.261, bPicStructure is 3. // A derived value PicCurrentField is defined as zero unless bPicStructure is 2 (bottom field).In which case, it is 1. // This member has the same meaning as the picture_structure variable defined in Section 6.3.10 and Table 6 - 14 of MPEG - 2 (H.262). [[maybe_unused]] constexpr BYTE TOP_FIELD = 1; [[maybe_unused]] constexpr BYTE BOTTOM_FIELD = 2; constexpr BYTE FRAME_PICTURE = 3; // sample field picture has half as many macroblocks as frame // but the display height used for the D3D12 decoder must be the full non-interlaced video height, not the interlaced sample height (half as the video display size) if (pPicParams->bPicStructure != FRAME_PICTURE) { *pHeight <<= 1; } } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: { auto pPicParams = GetPicParams(); *pWidth = pPicParams->vop_width; *pHeight = pPicParams->vop_height; *pMaxDPB = 2 + 1; } break; case VIDEO_DECODE_PROFILE_TYPE_H264: { auto pPicParams = GetPicParams(); // wFrameWidthInMbsMinus1 Width of the frame containing this picture, in units of macroblocks, minus 1. (The width in macroblocks is wFrameWidthInMbsMinus1 plus 1.) // wFrameHeightInMbsMinus1 Height of the frame containing this picture, in units of macroblocks, minus 1. // (The height in macroblocks is wFrameHeightInMbsMinus1 plus 1.) When the picture is a field, the height of the frame is // twice the height of the picture and is an integer multiple of 2 in units of macroblocks. *pWidth = (pPicParams->wFrameWidthInMbsMinus1 + 1) * 16; *pHeight = (pPicParams->wFrameHeightInMbsMinus1 + 1)/ (pPicParams->frame_mbs_only_flag ? 1 : 2); *pHeight = (2 - pPicParams->frame_mbs_only_flag) * *pHeight; *pHeight = *pHeight * 16; *pMaxDPB = pPicParams->num_ref_frames + 1; } break; case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: { auto pPicParams = GetPicParams(); // wFrameWidthInMbsMinus1 Width of the frame containing this picture, in units of macroblocks, minus 1. (The width in macroblocks is wFrameWidthInMbsMinus1 plus 1.) // wFrameHeightInMbsMinus1 Height of the frame containing this picture, in units of macroblocks, minus 1. // (The height in macroblocks is wFrameHeightInMbsMinus1 plus 1.) When the picture is a field, the height of the frame is // twice the height of the picture and is an integer multiple of 2 in units of macroblocks. *pWidth = (pPicParams->wFrameWidthInMbsMinus1 + 1) * 16; *pHeight = (pPicParams->wFrameHeightInMbsMinus1 + 1)/ (pPicParams->frame_mbs_only_flag ? 1 : 2); *pHeight = (2 - pPicParams->frame_mbs_only_flag) * *pHeight; *pHeight = *pHeight * 16; *pMaxDPB = pPicParams->num_ref_frames + 1; } break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: { auto pPicParams = GetPicParams(); UINT log2_min_luma_coding_block_size = pPicParams->log2_min_luma_coding_block_size_minus3 + 3; *pWidth = LengthFromMinCb(pPicParams->PicWidthInMinCbsY, log2_min_luma_coding_block_size); *pHeight = LengthFromMinCb(pPicParams->PicHeightInMinCbsY, log2_min_luma_coding_block_size); *pMaxDPB = pPicParams->sps_max_dec_pic_buffering_minus1 + 1; } break; case VIDEO_DECODE_PROFILE_TYPE_VP9: { auto pPicParams = GetPicParams(); *pWidth = pPicParams->width; *pHeight = pPicParams->height; *pMaxDPB = _countof(pPicParams->ref_frame_map) + 1; } break; case VIDEO_DECODE_PROFILE_TYPE_VP8: { auto pPicParams = GetPicParams(); *pWidth = pPicParams->width; *pHeight = pPicParams->height; *pMaxDPB = 3 + 1; } break; default: hr = E_INVALIDARG; break; } if (m_configurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) { const UINT AlignmentMask = 31; *pHeight = (*pHeight + AlignmentMask) & ~AlignmentMask; } return hr; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::UpdateCurrPic(Resource* pTexture2D, UINT subresourceIndex) { switch (m_profileType) { case VIDEO_DECODE_PROFILE_TYPE_VC1: case VIDEO_DECODE_PROFILE_TYPE_MPEG2: { auto pPicParams = GetPicParams(); pPicParams->wDecodedPictureIndex = m_referenceDataManager.StoreFutureReference( pPicParams->wDecodedPictureIndex, m_spCurrentDecoderHeap, pTexture2D, subresourceIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: { auto pPicParams = GetPicParams(); pPicParams->wDecodedPictureIndex = m_referenceDataManager.StoreFutureReference( pPicParams->wDecodedPictureIndex, m_spCurrentDecoderHeap, pTexture2D, subresourceIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_H264: { auto pPicParams = GetPicParams(); pPicParams->CurrPic.Index7Bits = m_referenceDataManager.StoreFutureReference( pPicParams->CurrPic.Index7Bits, m_spCurrentDecoderHeap, pTexture2D, subresourceIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: { auto pPicParams = GetPicParams(); pPicParams->CurrPic.Index7Bits = m_referenceDataManager.StoreFutureReference( pPicParams->CurrPic.Index7Bits, m_spCurrentDecoderHeap, pTexture2D, subresourceIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: { auto pPicParams = GetPicParams(); pPicParams->CurrPic.Index7Bits = m_referenceDataManager.StoreFutureReference( pPicParams->CurrPic.Index7Bits, m_spCurrentDecoderHeap, pTexture2D, subresourceIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_VP9: { auto pPicParams = GetPicParams(); pPicParams->CurrPic.Index7Bits = m_referenceDataManager.StoreFutureReference( pPicParams->CurrPic.Index7Bits, m_spCurrentDecoderHeap, pTexture2D, subresourceIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_VP8: { auto pPicParams = GetPicParams(); pPicParams->CurrPic.Index7Bits = m_referenceDataManager.StoreFutureReference( pPicParams->CurrPic.Index7Bits, m_spCurrentDecoderHeap, pTexture2D, subresourceIndex); } break; default: ThrowFailure(E_UNEXPECTED); break; } } //---------------------------------------------------------------------------------------------------------------------------------- template static void CopyNewStylePicParams(UINT& statusReportFeedbackNumber, DXVA_PicEntry& CurrPic, void* pParams) { const T* pPicParams = static_cast(pParams); statusReportFeedbackNumber = pPicParams->StatusReportFeedbackNumber; CurrPic.Index7Bits = pPicParams->CurrPic.Index7Bits; CurrPic.AssociatedFlag = pPicParams->CurrPic.AssociatedFlag; CurrPic.bPicEntry = pPicParams->CurrPic.bPicEntry; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::GetStatusReportFeedbackNumber(UINT& statusReportFeedbackNumber, DXVA_PicEntry& CurrPic, UCHAR& field_pic_flag) noexcept { void *pParams = GetPicParams(); if (!pParams) { return; } statusReportFeedbackNumber = 0; CurrPic.Index7Bits = 0; CurrPic.AssociatedFlag = 0; CurrPic.bPicEntry = 0; field_pic_flag = 0; switch (m_profileType) { case VIDEO_DECODE_PROFILE_TYPE_VC1: case VIDEO_DECODE_PROFILE_TYPE_MPEG2: { DXVA_PictureParameters *pPicParams = (DXVA_PictureParameters *)pParams; // From VC1 spec: StatusReportFeedbackNumber // Shall equal the value of(bPicScanFixed << 8) + bPicSanMethods in the picture parameters structure that the // host decoder sent in the Execute call for which the accelerator is reporting status information. statusReportFeedbackNumber = (pPicParams->bPicScanFixed << 8) + pPicParams->bPicScanMethod; CurrPic.Index7Bits = static_cast(pPicParams->wDecodedPictureIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: { DXVA_PicParams_MPEG4_PART2 *pPicParams = (DXVA_PicParams_MPEG4_PART2 *)pParams; statusReportFeedbackNumber = pPicParams->StatusReportFeedbackNumber; CurrPic.Index7Bits = static_cast(pPicParams->wDecodedPictureIndex); } break; case VIDEO_DECODE_PROFILE_TYPE_H264: CopyNewStylePicParams(statusReportFeedbackNumber, CurrPic, pParams); field_pic_flag = static_cast(pParams)->field_pic_flag; break; case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: CopyNewStylePicParams(statusReportFeedbackNumber, CurrPic, pParams); field_pic_flag = static_cast(pParams)->field_pic_flag; break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: CopyNewStylePicParams(statusReportFeedbackNumber, CurrPic, pParams); break; case VIDEO_DECODE_PROFILE_TYPE_VP9: CopyNewStylePicParams(statusReportFeedbackNumber, CurrPic, pParams); break; case VIDEO_DECODE_PROFILE_TYPE_VP8: CopyNewStylePicParams(statusReportFeedbackNumber, CurrPic, pParams); break; default: ThrowFailure(E_UNEXPECTED); break; } } //---------------------------------------------------------------------------------------------------------------------------------- static ProfileInfo* GetProfileInfo(_In_ REFGUID DecodeProfile) noexcept { for (auto& profile : AvailableProfiles) { if (DecodeProfile == profile.DecodeProfile) { return &profile; } } return nullptr; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ VIDEO_DECODE_PROFILE_TYPE VideoDecode::GetProfileType(REFGUID DecodeProfile) noexcept { ProfileInfo* pProfileInfo = GetProfileInfo(DecodeProfile); return pProfileInfo ? pProfileInfo->DecodeProfileType : VIDEO_DECODE_PROFILE_TYPE_NONE; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ VIDEO_DECODE_PROFILE_BIT_DEPTH VideoDecode::GetProfileBitDepth(REFGUID DecodeProfile) noexcept { ProfileInfo* pProfileInfo = GetProfileInfo(DecodeProfile); return pProfileInfo ? pProfileInfo->DecodeProfileBitDepth : VIDEO_DECODE_PROFILE_BIT_DEPTH_NONE; } //---------------------------------------------------------------------------------------------------------------------------------- VIDEO_DECODE_PROFILE_BIT_DEPTH VideoDecode::GetFormatBitDepth(DXGI_FORMAT Format) noexcept { switch (Format) { case DXGI_FORMAT_NV12: case DXGI_FORMAT_YUY2: case DXGI_FORMAT_AYUV: case DXGI_FORMAT_NV11: case DXGI_FORMAT_420_OPAQUE: return VIDEO_DECODE_PROFILE_BIT_DEPTH_8_BIT; case DXGI_FORMAT_P010: case DXGI_FORMAT_Y410: case DXGI_FORMAT_Y210: return VIDEO_DECODE_PROFILE_BIT_DEPTH_10_BIT; case DXGI_FORMAT_P016: case DXGI_FORMAT_Y416: case DXGI_FORMAT_Y216: return VIDEO_DECODE_PROFILE_BIT_DEPTH_16_BIT; } assert(false); return VIDEO_DECODE_PROFILE_BIT_DEPTH_NONE; } //---------------------------------------------------------------------------------------------------------------------------------- GUID VideoDecode::GetDecodeProfile(VIDEO_DECODE_PROFILE_TYPE ProfileType, VIDEO_DECODE_PROFILE_BIT_DEPTH BitDepth) noexcept { // Cache the profile initially chosen for a type and bit depth. // When Reuse decoder is enabled, select the same profile for a given bit depth // that was previously selected. // Codecs that have an 8bit and 10bit representation (HEVC and VP9) currently only have // for each bit depth. // Only codecs like H264 and VC1 have ambiguity here by having multiple 8bit profiles, so this // ensures that the profile doesn't change when the resolution does. VIDEO_DECODE_PROFILE_BIT_DEPTH_INDEX bitDepthIndex = GetIndex(BitDepth); if (!m_DecodeProfilePerBitDepth[bitDepthIndex].has_value()) { m_DecodeProfilePerBitDepth[bitDepthIndex] = GUID_NULL; for (auto& profile : AvailableProfiles) { if ( ProfileType == profile.DecodeProfileType && BitDepth == profile.DecodeProfileBitDepth) { m_DecodeProfilePerBitDepth[bitDepthIndex] = profile.DecodeProfile; break; } } } return m_DecodeProfilePerBitDepth[bitDepthIndex].value_or(GUID_NULL); } //---------------------------------------------------------------------------------------------------------------------------------- static ProfileBufferInfo *GetProfileBufferInfo(_In_ REFGUID DecodeProfile) noexcept { ProfileInfo* pProfileInfo = GetProfileInfo(DecodeProfile); return pProfileInfo ? &pProfileInfo->BufferInfo : nullptr; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT VideoDecode::GetVideoDecoderBufferTypeCount(const VIDEO_DECODE_DESC *pDesc, UINT *pBufferTypeCount) noexcept { HRESULT hr = S_OK; *pBufferTypeCount = 0; ProfileBufferInfo *pProfileBufferInfo = GetProfileBufferInfo(pDesc->DecodeProfile); if (pProfileBufferInfo) { *pBufferTypeCount = pProfileBufferInfo->BufferTypeCount; } else { hr = E_INVALIDARG; } return hr; } //---------------------------------------------------------------------------------------------------------------------------------- static bool IsXboxReuseDecoderProfileType(VIDEO_DECODE_PROFILE_TYPE ProfileType) { return ProfileType == VIDEO_DECODE_PROFILE_TYPE_H264 || ProfileType == VIDEO_DECODE_PROFILE_TYPE_H264_MVC || ProfileType == VIDEO_DECODE_PROFILE_TYPE_HEVC || ProfileType == VIDEO_DECODE_PROFILE_TYPE_VP9; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::GetVideoDecoderBufferInfo(const VIDEO_DECODE_DESC *pDesc, UINT Index, VIDEO_DECODE_BUFFER_TYPE *pType, UINT *pSize, bool IsXbox) { *pSize = 0; ProfileInfo* pProfileInfo = GetProfileInfo(pDesc->DecodeProfile); if (pProfileInfo) { ProfileBufferInfo *pProfileBufferInfo = &pProfileInfo->BufferInfo; UINT Width = pDesc->Width ? ((pDesc->Width < MIN_WIDTH) ? MIN_WIDTH : pDesc->Width) : MAX_WIDTH; UINT Height = pDesc->Height ? ((pDesc->Height < MIN_HEIGHT) ? MIN_HEIGHT : pDesc->Height) : MAX_HEIGHT; Width = Align(Width, MIN_ALIGN); Height = Align(Height, MIN_ALIGN); if (Index >= pProfileBufferInfo->BufferTypeCount) { ThrowFailure(E_INVALIDARG); } *pType = pProfileBufferInfo->Data[Index].Type; *pSize = pProfileBufferInfo->Data[Index].BaseSize; switch (*pType) { case VIDEO_DECODE_BUFFER_TYPE_SLICE_CONTROL: { const UINT MacroblockMinSize = 8; static_assert(MIN_ALIGN % MacroblockMinSize == 0, "MIN_ALIGN must be divisible by MacroblockMinSize"); UINT SliceControlBlocks = 0; switch (pProfileInfo->DecodeProfileType) { case VIDEO_DECODE_PROFILE_TYPE_MPEG2: // worst case scenario for MPEG2 is one slice control per macroblock in the picture. SliceControlBlocks = (Width * Height) / MacroblockMinSize; break; case VIDEO_DECODE_PROFILE_TYPE_H264: // worst case scenario for H264 per tables A-1 for Level 6.2 (MaxMBPS = 16711680) and Table A-4 for SliceRate for level 6.2 (SliceRate = 24) // Taking a 4K resolution (3840*2160) @ 300 fps (allowed in level 6.2) // From spec ...satisfy the constraint that the number of slices in picture n is less than or equal to MaxMBPB * ( tr(n) - tr(n-1) ) / SliceRate // where MaxMBPS and SliceRate are the values specified in Tables A-1 and A-4... // tr(n) = tr(n-1) = 1 / FPSRate = 1/300 // MaxSliceNumber = 2319 SliceControlBlocks = 2319; break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: // worst case scenario for HEVC is 600 slices per Table A.4 of the HEVC spec for profile 6.2. Setting to 1024 just in case. SliceControlBlocks = 1024; break; default: // assume worst case scenario is one slice control per row of macroblocks SliceControlBlocks = Height / MacroblockMinSize; break; } //*pSize *= SliceControlBlocks; HRESULT hr = UIntMult(*pSize, SliceControlBlocks, pSize); ThrowFailure(hr); } break; case VIDEO_DECODE_BUFFER_TYPE_BITSTREAM: { if ( IsXbox && IsXboxReuseDecoderProfileType(pProfileInfo->DecodeProfileType)) { // The width and height queried here are initial sizes for the decoder. // When the decoder supports REUSE_DECODER (xbox only), the resolution // can go up without recreating the decoder or the bitstream buffer, so // a larger size than might be calculated below. Xbox has found the following // Dimensions to be sufficient for that device regardless of content dimensions. switch(pProfileInfo->DecodeProfileType) { case VIDEO_DECODE_PROFILE_TYPE_H264: case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: *pSize = 1800 * 1024; break; default: assert(pProfileInfo->DecodeProfileType == VIDEO_DECODE_PROFILE_TYPE_HEVC || pProfileInfo->DecodeProfileType == VIDEO_DECODE_PROFILE_TYPE_VP9); *pSize = 6912 * 1024; break; } } else { // *pSize = (Width * Height * 2 * pProfileBufferInfo->CompressedStreamMultiplier.Num) / pProfileBufferInfo->CompressedStreamMultiplier.Denom; // Smaller streams use a 1:1 compressed stream multiplier. Memory optimization is not as important at these smaller sizes. // Use a 1:1 ratio to calculate a minimum size against the MinWidth/MinHeight UINT minSize; ThrowFailure(UIntMult(pProfileBufferInfo->CompressedStreamMultiplier.MinWidth, pProfileBufferInfo->CompressedStreamMultiplier.MinHeight, &minSize)); ThrowFailure(UIntMult(minSize, 2, &minSize)); // Use the specified multipliers to calculate a size based on the stream width and height. UINT calcSize; ThrowFailure(UIntMult(Width, Height, &calcSize)); ThrowFailure(UIntMult(calcSize, 2 * pProfileBufferInfo->CompressedStreamMultiplier.Num, &calcSize)); calcSize /= pProfileBufferInfo->CompressedStreamMultiplier.Denom; // Take the larger of the two. *pSize = std::max(minSize, calcSize); } } break; } } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::GetVideoDecoderSupport(ID3D12Device *pDevice12, UINT NodeIndex, const VIDEO_DECODE_DESC *pDesc, D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT &decodeSupport) { unique_comptr spVideoDevice; ThrowFailure(pDevice12->QueryInterface(&spVideoDevice)); decodeSupport.NodeIndex = NodeIndex; decodeSupport.Configuration.DecodeProfile = pDesc->DecodeProfile; decodeSupport.Configuration.BitstreamEncryption = D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE; decodeSupport.Configuration.InterlaceType = D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE; decodeSupport.Width = pDesc->Width; decodeSupport.Height = pDesc->Height; decodeSupport.DecodeFormat = pDesc->DecodeFormat; decodeSupport.FrameRate.Numerator = 30; decodeSupport.FrameRate.Denominator = 1; decodeSupport.BitRate = 0; ThrowFailure(spVideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &decodeSupport, sizeof(decodeSupport))); } //---------------------------------------------------------------------------------------------------------------------------------- static bool SupportsArrayOfTexture(const D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT& decodeSupport, VIDEO_DECODE_PROFILE_TYPE ProfileType) { return ( ( decodeSupport.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2 || (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) != 0) && ( ProfileType == VIDEO_DECODE_PROFILE_TYPE_H264 || ProfileType == VIDEO_DECODE_PROFILE_TYPE_H264_MVC || ProfileType == VIDEO_DECODE_PROFILE_TYPE_HEVC || ProfileType == VIDEO_DECODE_PROFILE_TYPE_VP9)); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::GetVideoDecoderConfigCount(ID3D12Device *pDevice12, UINT NodeIndex, const VIDEO_DECODE_DESC *pDesc, UINT *pConfigCount) { UINT configCount = 0; D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {}; GetVideoDecoderSupport(pDevice12, NodeIndex, pDesc, decodeSupport); if (decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED) { configCount++; } *pConfigCount = configCount; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDecode::GetVideoDecoderConfig(ID3D12Device *pDevice12, UINT NodeIndex, const VIDEO_DECODE_DESC *pDesc, UINT configIndex, VIDEO_DECODE_CONFIG *pConfig, bool IsXbox) { pConfig->InterlaceType = D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE; pConfig->ConfigDecoderSpecific = 0; size_t supportedConfigIndex = 0; D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT decodeSupport = {}; GetVideoDecoderSupport(pDevice12, NodeIndex, pDesc, decodeSupport); if (decodeSupport.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED) { if (supportedConfigIndex == configIndex) // found supported matching configIndex { // REUSE_DECODER indicates that the decoder may be re-used in the event of a resolution change // and also a bit depth change (and therefore a Profile change). D3D12 only supports the former. // VP9 is disabled because the resolution may change on non-key frames which is tested. VIDEO_DECODE_PROFILE_TYPE ProfileType = GetProfileType(pDesc->DecodeProfile); if ( IsXbox && IsXboxReuseDecoderProfileType(ProfileType)) { pConfig->ConfigDecoderSpecific |= VIDEO_DECODE_CONFIG_SPECIFIC_REUSE_DECODER; } if (SupportsArrayOfTexture(decodeSupport, ProfileType)) { pConfig->ConfigDecoderSpecific |= VIDEO_DECODE_CONFIG_SPECIFIC_ARRAY_OF_TEXTURES; } if (decodeSupport.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_HEIGHT_ALIGNMENT_MULTIPLE_32_REQUIRED) { pConfig->ConfigDecoderSpecific |= VIDEO_DECODE_CONFIG_SPECIFIC_ALIGNMENT_HEIGHT; } return; } ++supportedConfigIndex; } ThrowFailure(E_INVALIDARG); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ HRESULT VideoDecode::GetDecodingStatus(void* pData, UINT dataSize) noexcept { m_decodingStatus.ReadAvailableData(m_spVideoDecoder->GetForImmediateUse(), m_profileType, static_cast(pData), dataSize); // throw( _com_error ) return S_OK; } }; ================================================ FILE: src/VideoDecodeStatistics.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include namespace D3D12TranslationLayer { const UINT16 DXVA_MIN_STATUS_REPORTS = 512; // The DXVA specs require a minimum of 512 entries in the driver for status reports const UCHAR VideoDecodeStatusMap[] = { 0, // D3D12_VIDEO_DECODE_STATUS_OK 1, // D3D12_VIDEO_DECODE_STATUS_CONTINUE 2, // D3D12_VIDEO_DECODE_STATUS_CONTINUE_SKIP_DISPLAY 3, // D3D12_VIDEO_DECODE_STATUS_RESTART 3, // D3D12_VIDEO_DECODE_STATUS_RESTART }; //---------------------------------------------------------------------------------------------------------------------------------- VideoDecodeStatistics::VideoDecodeStatistics(ImmediateContext* pDevice) : DeviceChild(pDevice) , m_ResultCount(DXVA_MIN_STATUS_REPORTS) { D3D12_QUERY_HEAP_DESC QueryHeapDesc = { D3D12_QUERY_HEAP_TYPE_VIDEO_DECODE_STATISTICS, 1, m_pParent->GetNodeMask() }; ThrowFailure(m_pParent->m_pDevice12->CreateQueryHeap( &QueryHeapDesc, IID_PPV_ARGS(&m_spQueryHeap) )); // throw( _com_error ) // Query data goes into a readback heap for CPU readback in GetData SIZE_T BufferSize = GetResultOffsetForIndex(m_ResultCount); m_ResultBuffer = m_pParent->AcquireSuballocatedHeap( AllocatorHeapType::Readback, BufferSize, ResourceAllocationContext::FreeThread); // throw( _com_error ) m_StatisticsInfo.resize(m_ResultCount); // throw(bad_alloc ) void* pMappedData; CD3DX12_RANGE ReadRange(0, 0); ThrowFailure(m_ResultBuffer.Map(0, &ReadRange, &pMappedData)); ZeroMemory(pMappedData, BufferSize); CD3DX12_RANGE WrittenRange(0, BufferSize); m_ResultBuffer.Unmap(0, &WrittenRange); } //---------------------------------------------------------------------------------------------------------------------------------- VideoDecodeStatistics::~VideoDecodeStatistics() { AddToDeferredDeletionQueue(m_spQueryHeap); if (m_ResultBuffer.IsInitialized()) { m_pParent->ReleaseSuballocatedHeap(AllocatorHeapType::Readback, m_ResultBuffer, m_LastUsedCommandListID[(UINT)COMMAND_LIST_TYPE::VIDEO_DECODE], COMMAND_LIST_TYPE::VIDEO_DECODE); } } //---------------------------------------------------------------------------------------------------------------------------------- void VideoDecodeStatistics::EndQuery(UINT StatusReportFeedbackNumber, const DXVA_PicEntry& CurrPic, UCHAR field_pic_flag) noexcept { auto pCommandList = m_pParent->GetVideoDecodeCommandList(); pCommandList->EndQuery( m_spQueryHeap.get(), D3D12_QUERY_TYPE_VIDEO_DECODE_STATISTICS, 0); SIZE_T offset = GetResultOffsetForIndex(m_SubmissionIndex); pCommandList->ResolveQueryData( m_spQueryHeap.get(), D3D12_QUERY_TYPE_VIDEO_DECODE_STATISTICS, 0, 1, m_ResultBuffer.GetResource(), offset + m_ResultBuffer.GetOffset() ); StatisticsInfo& statisticsInfo = m_StatisticsInfo[m_SubmissionIndex]; statisticsInfo.CompletedFenceId = m_pParent->GetCommandListID(COMMAND_LIST_TYPE::VIDEO_DECODE); statisticsInfo.StatusReportFeedbackNumber = StatusReportFeedbackNumber; statisticsInfo.CurrPic = CurrPic; statisticsInfo.field_pic_flag = field_pic_flag; UsedInCommandList(COMMAND_LIST_TYPE::VIDEO_DECODE, statisticsInfo.CompletedFenceId); m_SubmissionIndex++; if (m_SubmissionIndex == m_ResultCount) { m_SubmissionIndex = 0; } } //---------------------------------------------------------------------------------------------------------------------------------- SIZE_T VideoDecodeStatistics::GetStatStructSize(VIDEO_DECODE_PROFILE_TYPE profileType) { switch (profileType) { case VIDEO_DECODE_PROFILE_TYPE_H264: case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: return sizeof(DXVA_Status_H264); case VIDEO_DECODE_PROFILE_TYPE_VP9: case VIDEO_DECODE_PROFILE_TYPE_VP8: return sizeof(DXVA_Status_VPx); case VIDEO_DECODE_PROFILE_TYPE_HEVC: return sizeof(DXVA_Status_HEVC); case VIDEO_DECODE_PROFILE_TYPE_VC1: return sizeof(DXVA_Status_VC1); case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: return sizeof(DXVA_Status_VC1); // TODO: Srinath to confirm if MPEG4PT2 spec is really right mentioning to use this one case VIDEO_DECODE_PROFILE_TYPE_MPEG2: // TODO: no info on what to use for this one. return 0; default: { ThrowFailure(E_UNEXPECTED); } } // Unreachable return 0; } //---------------------------------------------------------------------------------------------------------------------------------- void VideoDecodeStatistics::ReadAvailableData(ID3D12VideoDecoder* pVideoDecoder, VIDEO_DECODE_PROFILE_TYPE profileType, BYTE* pData, SIZE_T DataSize) { // From the H.264 DXVA spec. HEVC and VPx specs have similar language. // // When the accelerator receives the Execute call for status reporting, it should not stall operation to wait // for any prior operations to complete. Instead, it should immediately provide the available status information // for all operations that have completed since the previous request for a status report, up to the maximum amount // requested. Immediately after the Execute call returns, the host decoder can read the status report information // from the buffer. // // The spec does not specify what to do when caller asks for status and there are no status reports available, or // when there are less status reports available than are requested. A modified output size is not returned to apps by // runtimes and there doesn't appear to be an expected error code. Documentation for StatusRepportFeedbackNumber says // that "The value should not equal 0, and should be different in each call to Execute", so we zero the status reports // here including their StatusReportFeedbackNumber to indicate they do not have valid data. This is consistent with some // drivers. // ZeroMemory(pData, DataSize); // Map the resolved query buffer. void* pMappedData = nullptr; CD3DX12_RANGE ReadRange(0, GetResultOffsetForIndex(m_ResultCount)); ThrowFailure(m_ResultBuffer.Map(0, &ReadRange, &pMappedData)); auto Range = CD3DX12_RANGE(0, 0); auto Unmap = MakeScopeExit([&]() { m_ResultBuffer.Unmap(0, &Range); }); // Determine the fence ID of the last completed UINT64 lastCompletedFenceID = m_pParent->GetCompletedFenceValue(COMMAND_LIST_TYPE::VIDEO_DECODE); SIZE_T codecStructSize = GetStatStructSize(profileType); UINT16 statisticsInfoIndex = (m_SubmissionIndex == 0 ? m_ResultCount : m_SubmissionIndex) - 1; for(UINT16 i(0); i < m_ResultCount && DataSize >= codecStructSize; ++i, --statisticsInfoIndex) { StatisticsInfo& statisticsInfo = m_StatisticsInfo[statisticsInfoIndex]; const D3D12_QUERY_DATA_VIDEO_DECODE_STATISTICS* pD3d12VideoStats = static_cast(pMappedData) + statisticsInfoIndex; if (statisticsInfo.CompletedFenceId <= lastCompletedFenceID) { switch (profileType) { case VIDEO_DECODE_PROFILE_TYPE_H264: case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: { assert(codecStructSize == sizeof(DXVA_Status_H264)); DXVA_Status_H264 *pStatus = reinterpret_cast(pData); pStatus->StatusReportFeedbackNumber = statisticsInfo.StatusReportFeedbackNumber; pStatus->CurrPic.Index7Bits = statisticsInfo.CurrPic.Index7Bits; pStatus->CurrPic.AssociatedFlag = statisticsInfo.CurrPic.AssociatedFlag; pStatus->CurrPic.bPicEntry = statisticsInfo.CurrPic.bPicEntry; pStatus->field_pic_flag = statisticsInfo.field_pic_flag; pStatus->bStatus = VideoDecodeStatusMap[pD3d12VideoStats->Status]; pStatus->wNumMbsAffected = (USHORT)pD3d12VideoStats->NumMacroblocksAffected; if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "GetStatus - StatusReportFeedbackNumber", TraceLoggingPointer(pVideoDecoder, "pID3D12Decoder"), TraceLoggingValue(pStatus->StatusReportFeedbackNumber, "statusReportFeedbackNumber")); } } break; case VIDEO_DECODE_PROFILE_TYPE_HEVC: { assert(codecStructSize == sizeof(DXVA_Status_HEVC)); DXVA_Status_HEVC *pStatus = reinterpret_cast(pData); pStatus->StatusReportFeedbackNumber = static_cast(statisticsInfo.StatusReportFeedbackNumber); pStatus->CurrPic.Index7Bits = statisticsInfo.CurrPic.Index7Bits; pStatus->CurrPic.AssociatedFlag = statisticsInfo.CurrPic.AssociatedFlag; pStatus->CurrPic.bPicEntry = statisticsInfo.CurrPic.bPicEntry; pStatus->bStatus = VideoDecodeStatusMap[pD3d12VideoStats->Status]; pStatus->wNumMbsAffected = (USHORT)pD3d12VideoStats->NumMacroblocksAffected; if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "GetStatus - StatusReportFeedbackNumber", TraceLoggingPointer(pVideoDecoder, "pID3D12Decoder"), TraceLoggingValue(pStatus->StatusReportFeedbackNumber, "statusReportFeedbackNumber")); } } break; case VIDEO_DECODE_PROFILE_TYPE_VP9: case VIDEO_DECODE_PROFILE_TYPE_VP8: { assert(codecStructSize == sizeof(DXVA_Status_VPx)); DXVA_Status_VPx *pStatus = reinterpret_cast(pData); pStatus->StatusReportFeedbackNumber = statisticsInfo.StatusReportFeedbackNumber; pStatus->CurrPic.Index7Bits = statisticsInfo.CurrPic.Index7Bits; pStatus->CurrPic.AssociatedFlag = statisticsInfo.CurrPic.AssociatedFlag; pStatus->CurrPic.bPicEntry = statisticsInfo.CurrPic.bPicEntry; pStatus->bStatus = VideoDecodeStatusMap[pD3d12VideoStats->Status]; pStatus->wNumMbsAffected = (USHORT)pD3d12VideoStats->NumMacroblocksAffected; if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "GetStatus - StatusReportFeedbackNumber", TraceLoggingPointer(pVideoDecoder, "pID3D12Decoder"), TraceLoggingValue(pStatus->StatusReportFeedbackNumber, "statusReportFeedbackNumber")); } } break; case VIDEO_DECODE_PROFILE_TYPE_VC1: case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: { assert(codecStructSize == sizeof(DXVA_Status_VC1)); DXVA_Status_VC1 *pStatus = reinterpret_cast(pData); pStatus->StatusReportFeedbackNumber = static_cast(statisticsInfo.StatusReportFeedbackNumber); pStatus->wDecodedPictureIndex = statisticsInfo.CurrPic.Index7Bits; pStatus->bStatus = VideoDecodeStatusMap[pD3d12VideoStats->Status]; pStatus->wNumMbsAffected = (USHORT)pD3d12VideoStats->NumMacroblocksAffected; if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "GetStatus - StatusReportFeedbackNumber", TraceLoggingPointer(pVideoDecoder, "pID3D12Decoder"), TraceLoggingValue(pStatus->StatusReportFeedbackNumber, "statusReportFeedbackNumber")); } } break; case VIDEO_DECODE_PROFILE_TYPE_MPEG2: // TODO: can't find info about this one, Srinath is checking. { assert(codecStructSize == sizeof(DXVA_Status_H264)); break; } default: { ThrowFailure(E_INVALIDARG); break; } } statisticsInfo.CompletedFenceId = UINT64_MAX; DataSize -= codecStructSize; pData += codecStructSize; } if (statisticsInfoIndex == 0) { statisticsInfoIndex = m_ResultCount; } } } //---------------------------------------------------------------------------------------------------------------------------------- SIZE_T VideoDecodeStatistics::GetResultOffsetForIndex(UINT Index) { return sizeof(D3D12_QUERY_DATA_VIDEO_DECODE_STATISTICS) * Index; } }; // namespace D3D12TranslationLayer ================================================ FILE: src/VideoDevice.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- void VideoDevice::Initialize() { if (!m_pParent->m_pDevice12_1) { ThrowFailure(E_NOINTERFACE); } m_pParent->InitializeVideo(&m_spVideoDevice); D3D12_FEATURE_DATA_VIDEO_DECODE_PROFILE_COUNT decodeProfileData = {}; decodeProfileData.NodeIndex = m_pParent->GetNodeIndex(); CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_PROFILE_COUNT, &decodeProfileData, sizeof(decodeProfileData)); m_decodeProfiles.resize(decodeProfileData.ProfileCount); //throw( bad_alloc ) // get profiles std::unique_ptr spGUIDs; spGUIDs.reset(new GUID[decodeProfileData.ProfileCount]); //throw( bad_alloc ) D3D12_FEATURE_DATA_VIDEO_DECODE_PROFILES profiles = {}; profiles.NodeIndex = m_pParent->GetNodeIndex(); profiles.ProfileCount = decodeProfileData.ProfileCount; profiles.pProfiles = spGUIDs.get(); CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_PROFILES, &profiles, sizeof(profiles)); // fill formats for each profile, filtering out profiles not supported by the translation layer UINT filteredCount = 0; for (UINT i = 0; i < decodeProfileData.ProfileCount; i++) { // Only expose profiles that the translation layer knows how to decode. // D3D12 may report profiles (e.g. AV1) that we don't have codec-specific // handling for in VideoDecode, which would cause failures at decode time. if (VideoDecode::GetProfileType(spGUIDs[i]) == VIDEO_DECODE_PROFILE_TYPE_NONE) { continue; } m_decodeProfiles[filteredCount].profileGUID = spGUIDs[i]; D3D12_VIDEO_DECODE_CONFIGURATION decodeConfig = { m_decodeProfiles[filteredCount].profileGUID, D3D12_BITSTREAM_ENCRYPTION_TYPE_NONE, D3D12_VIDEO_FRAME_CODED_INTERLACE_TYPE_NONE }; // format count D3D12_FEATURE_DATA_VIDEO_DECODE_FORMAT_COUNT decodeProfileFormatData = {}; decodeProfileFormatData.NodeIndex = m_pParent->GetNodeIndex(); decodeProfileFormatData.Configuration = decodeConfig; CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_FORMAT_COUNT, &decodeProfileFormatData, sizeof(decodeProfileFormatData)); // decoder formats D3D12_FEATURE_DATA_VIDEO_DECODE_FORMATS formats = {}; formats.NodeIndex = m_pParent->GetNodeIndex(); formats.Configuration = decodeConfig; formats.FormatCount = decodeProfileFormatData.FormatCount; m_decodeProfiles[filteredCount].formats.resize(formats.FormatCount); //throw( bad_alloc )) formats.pOutputFormats = m_decodeProfiles[filteredCount].formats.data(); CheckFeatureSupport(D3D12_FEATURE_VIDEO_DECODE_FORMATS, &formats, sizeof(formats)); filteredCount++; } m_decodeProfiles.resize(filteredCount); } //---------------------------------------------------------------------------------------------------------------------------------- VideoDevice::~VideoDevice() noexcept { } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderProfileCount(UINT *ProfileCount) { if (!ProfileCount) { ThrowFailure(E_POINTER); } *ProfileCount = (UINT)m_decodeProfiles.size(); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderProfile(UINT Index, GUID *pDecodeProfile) { if (Index >= m_decodeProfiles.size()) { ThrowFailure(E_INVALIDARG); } else { *pDecodeProfile = m_decodeProfiles[Index].profileGUID; } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderFormatCount(const GUID *pDecodeProfile, UINT *pFormatCount) { for (size_t i = 0; i < m_decodeProfiles.size(); i++) { if (m_decodeProfiles[i].profileGUID == *pDecodeProfile) { *pFormatCount = (UINT)m_decodeProfiles[i].formats.size(); return; } } ThrowFailure(E_INVALIDARG); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderFormat(const GUID *pDecodeProfile, UINT Index, DXGI_FORMAT *pFormat) { for (size_t i = 0; i < m_decodeProfiles.size(); i++) { if (m_decodeProfiles[i].profileGUID == *pDecodeProfile) { *pFormat = m_decodeProfiles[i].formats[Index]; return; } } ThrowFailure(E_INVALIDARG); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::CheckVideoDecoderFormat(const GUID *pDecodeProfile, DXGI_FORMAT format, BOOL *pSupported) { std::unique_ptr spFormats; if (!pSupported) { ThrowFailure(E_POINTER); } *pSupported = FALSE; for (size_t i = 0; i < m_decodeProfiles.size(); i++) { if (m_decodeProfiles[i].profileGUID == *pDecodeProfile) { for (size_t j = 0; j < m_decodeProfiles[i].formats.size(); j++) { if (format == m_decodeProfiles[i].formats[j] && CD3D11FormatHelper::GetTypeLevel(format) == D3D11FTL_FULL_TYPE) { *pSupported = TRUE; break; } } break; } } } //---------------------------------------------------------------------------------------------------------------------------------- bool VideoDevice::IsProfileSupported(REFGUID DecodeProfile) noexcept { for (size_t i = 0; i < m_decodeProfiles.size(); i++) { if (m_decodeProfiles[i].profileGUID == DecodeProfile) { return true; } } return false; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderBufferTypeCount(const VIDEO_DECODE_DESC *pDesc, UINT *pBufferTypeCount) { if (!IsProfileSupported(pDesc->DecodeProfile)) { ThrowFailure(E_INVALIDARG); } ThrowFailure(VideoDecode::GetVideoDecoderBufferTypeCount(pDesc, pBufferTypeCount)); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderBufferInfo(const VIDEO_DECODE_DESC *pDesc, UINT Index, VIDEO_DECODE_BUFFER_TYPE *pType, UINT *pSize) { if (!IsProfileSupported(pDesc->DecodeProfile)) { ThrowFailure(E_INVALIDARG); } VideoDecode::GetVideoDecoderBufferInfo(pDesc, Index, pType, pSize, m_pParent->IsXbox()); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderConfigCount(const VIDEO_DECODE_DESC *pDesc, UINT *pConfigCount) { for (size_t i = 0; i < m_decodeProfiles.size(); i++) { if (m_decodeProfiles[i].profileGUID == pDesc->DecodeProfile) { VideoDecode::GetVideoDecoderConfigCount(m_pParent->m_pDevice12.get(), m_pParent->GetNodeIndex(), pDesc, pConfigCount); return; } } *pConfigCount = 0; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::GetVideoDecoderConfig(const VIDEO_DECODE_DESC *pDesc, UINT Index, VIDEO_DECODE_CONFIG *pConfig) { for (size_t i = 0; i < m_decodeProfiles.size(); i++) { if (m_decodeProfiles[i].profileGUID == pDesc->DecodeProfile) { VideoDecode::GetVideoDecoderConfig(m_pParent->m_pDevice12.get(), m_pParent->GetNodeIndex(), pDesc, Index, pConfig, m_pParent->IsXbox()); return; } } ThrowFailure(E_INVALIDARG); } // Some of the 'enum values' below are actually #defines, which generates a warning when used in a case statement. #pragma warning(disable: 4063) //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoDevice::CheckFeatureSupport(D3D12_FEATURE_VIDEO FeatureVideo, void* pFeatureSupportData, UINT FeatureSupportDataSize) { switch (FeatureVideo) { case D3D12_FEATURE_VIDEO_DECODE_SUPPORT: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_DECODE_CONVERSION_SUPPORT: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_DECODE_PROFILE_COUNT: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_DECODE_PROFILES: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_DECODE_FORMAT_COUNT: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_DECODE_FORMATS: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_DECODE_HISTOGRAM: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } default: ThrowFailure(E_NOTIMPL); break; } ThrowFailure(m_spVideoDevice->CheckFeatureSupport(FeatureVideo, pFeatureSupportData, FeatureSupportDataSize)); } }; ================================================ FILE: src/VideoProcess.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include #include #include "VideoProcessShaders.h" namespace D3D12TranslationLayer { void ColorConvertNormalized(_In_reads_(4) const FLOAT normInput[4], DXGI_COLOR_SPACE_TYPE inputColorSpace, _Out_writes_(4) FLOAT normOutput[4], DXGI_COLOR_SPACE_TYPE outputColorSpace); VideoProcessor::VideoProcessor( ImmediateContext *pContext, ID3D12VideoDevice* pVideoDeviceNoRef, const D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC* pOutputStreamDesc, UINT NumInputStreamDescs, const D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC *pInputStreamDescs ) : DeviceChildImpl(pContext) { ThrowFailure(pVideoDeviceNoRef->CreateVideoProcessor(m_pParent->GetNodeMask(), pOutputStreamDesc, NumInputStreamDescs, pInputStreamDescs, IID_PPV_ARGS(GetForCreate()))); } //---------------------------------------------------------------------------------------------------------------------------------- void VideoProcess::Initialize() { if (!m_pParent->m_pDevice12_1) { ThrowFailure(E_NOINTERFACE); } ThrowFailure(m_pParent->m_pDevice12_1->QueryInterface(&m_spVideoDevice)); D3D12_FEATURE_DATA_VIDEO_PROCESS_MAX_INPUT_STREAMS maxInputStreamsData = { m_pParent->GetNodeIndex() }; ThrowFailure(m_spVideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_PROCESS_MAX_INPUT_STREAMS, &maxInputStreamsData, sizeof(maxInputStreamsData))); m_driverSupportedMaxInputStreams = maxInputStreamsData.MaxInputStreams; } //---------------------------------------------------------------------------------------------------------------------------------- VideoProcess::~VideoProcess() noexcept { // Stop exception here, as destructor is noexcept try { m_pParent->Flush(COMMAND_LIST_TYPE_VIDEO_PROCESS_MASK); //throws } catch (_com_error&) { // success = false; } catch (std::bad_alloc&) { // success = false; } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcess::UpdateInputDescriptor(VIDEO_PROCESS_INPUT_ARGUMENTS* pInputArguments, UINT NumInputStreams, VIDEO_PROCESS_OUTPUT_ARGUMENTS* pOutputArguments, bool &updated) { if (NumInputStreams != m_creationInputDesc.size()) { updated = true; m_creationInputDesc.resize(NumInputStreams); } for (UINT i = 0; i < NumInputStreams; i++) { D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC& inputDesc = pInputArguments->D3D12InputStreamDesc[i]; UpdateNeededMaxPastFutureFrames(inputDesc, pOutputArguments->D3D12OutputStreamDesc); if (!updated && memcmp(&inputDesc, &m_creationInputDesc[i], sizeof(inputDesc)) != 0) { updated = true; } if (updated) { m_creationInputDesc[i] = inputDesc; } } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcess::UpdateOutputDescriptor(VIDEO_PROCESS_OUTPUT_ARGUMENTS* pOutputArguments, bool &updated) { D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC& outputDesc = pOutputArguments->D3D12OutputStreamDesc; if (memcmp(&outputDesc, &m_creationOutputDesc, sizeof(pOutputArguments->D3D12OutputStreamDesc)) != 0) { m_creationOutputDesc = outputDesc; updated = true; } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcess::InitializeProcessor(VIDEO_PROCESS_INPUT_ARGUMENTS* pInputArguments, UINT NumInputStreams, VIDEO_PROCESS_OUTPUT_ARGUMENTS* pOutputArguments) { bool updated = false; UpdateInputDescriptor(pInputArguments, NumInputStreams, pOutputArguments, updated); UpdateOutputDescriptor(pOutputArguments, updated); if (!m_spVideoProcessor || updated) { // We need to create the video processor obeying the maximum number of input streams it supports NumInputStreams = NumInputStreams > m_driverSupportedMaxInputStreams ? m_driverSupportedMaxInputStreams : NumInputStreams; m_spVideoProcessor.reset(); m_spVideoProcessor = std::make_unique(m_pParent, m_spVideoDevice.get(), &m_creationOutputDesc, NumInputStreams, m_creationInputDesc.data()); } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcess::UpdateNeededMaxPastFutureFrames(D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC &inputDesc, const D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC &outputDesc) { D3D12_FEATURE_DATA_VIDEO_PROCESS_REFERENCE_INFO referenceInfo = {}; referenceInfo.NodeIndex = m_pParent->GetNodeIndex(); D3D12_VIDEO_PROCESS_FEATURE_FLAGS featureFlags = D3D12_VIDEO_PROCESS_FEATURE_FLAG_NONE; featureFlags |= outputDesc.AlphaFillMode ? D3D12_VIDEO_PROCESS_FEATURE_FLAG_ALPHA_FILL : D3D12_VIDEO_PROCESS_FEATURE_FLAG_NONE; featureFlags |= inputDesc.LumaKey.Enable ? D3D12_VIDEO_PROCESS_FEATURE_FLAG_LUMA_KEY : D3D12_VIDEO_PROCESS_FEATURE_FLAG_NONE; featureFlags |= (inputDesc.StereoFormat != D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE || outputDesc.EnableStereo) ? D3D12_VIDEO_PROCESS_FEATURE_FLAG_STEREO : D3D12_VIDEO_PROCESS_FEATURE_FLAG_NONE; featureFlags |= inputDesc.EnableOrientation ? D3D12_VIDEO_PROCESS_FEATURE_FLAG_ROTATION | D3D12_VIDEO_PROCESS_FEATURE_FLAG_FLIP : D3D12_VIDEO_PROCESS_FEATURE_FLAG_NONE; featureFlags |= inputDesc.EnableAlphaBlending ? D3D12_VIDEO_PROCESS_FEATURE_FLAG_ALPHA_BLENDING : D3D12_VIDEO_PROCESS_FEATURE_FLAG_NONE; referenceInfo.DeinterlaceMode = inputDesc.DeinterlaceMode; referenceInfo.Filters = inputDesc.FilterFlags; referenceInfo.FeatureSupport = featureFlags; referenceInfo.InputFrameRate = inputDesc.FrameRate; referenceInfo.OutputFrameRate = outputDesc.FrameRate; referenceInfo.EnableAutoProcessing = inputDesc.EnableAutoProcessing; m_spVideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_PROCESS_REFERENCE_INFO, &referenceInfo, sizeof(referenceInfo)); inputDesc.NumPastFrames = referenceInfo.PastFrames; inputDesc.NumFutureFrames = referenceInfo.FutureFrames; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcess::ProcessFrames(VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, UINT NumInputStreams, VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments) { // translate input arguments for (DWORD stream = 0; stream < NumInputStreams; stream++) { DWORD nViews = pInputArguments->D3D12InputStreamDesc[stream].StereoFormat == D3D12_VIDEO_FRAME_STEREO_FORMAT_SEPARATE ? 2 : 1; for (DWORD view = 0; view < nViews; view++) { pInputArguments->PrepareResources(stream, view); } pInputArguments->PrepareStreamArguments(stream); } // Do VP blit for the number of supported streams by the driver UINT NumInputStreamsForVPBlit = min(NumInputStreams, m_driverSupportedMaxInputStreams); // translate output arguments pOutputArguments->PrepareResources(); pOutputArguments->PrepareTransform(); // pre-process resources using shaders if necessary m_Deinterlace.Process(pInputArguments, NumInputStreamsForVPBlit, pOutputArguments); InitializeProcessor(pInputArguments, NumInputStreamsForVPBlit, pOutputArguments); // transition resource states. for (DWORD stream = 0; stream < NumInputStreamsForVPBlit; stream++) { DWORD nViews = pInputArguments->D3D12InputStreamDesc[stream].StereoFormat == D3D12_VIDEO_FRAME_STEREO_FORMAT_SEPARATE ? 2 : 1; for (DWORD view = 0; view < nViews; view++) { pInputArguments->TransitionResources(m_pParent, stream, view); } } pOutputArguments->TransitionResources(m_pParent); m_pParent->GetResourceStateManager().ApplyAllResourceTransitions(); // submit ProcessFrame m_pParent->GetVideoProcessCommandList()->ProcessFrames1( m_spVideoProcessor->GetForUse(COMMAND_LIST_TYPE::VIDEO_PROCESS), &pOutputArguments->D3D12OutputStreamArguments, NumInputStreamsForVPBlit, pInputArguments->D3D12InputStreamArguments.data()); m_pParent->SubmitCommandList(COMMAND_LIST_TYPE::VIDEO_PROCESS); // throws // now Blit remaining streams (if needed) if (NumInputStreamsForVPBlit < NumInputStreams) { EmulateVPBlit(pInputArguments, NumInputStreams, pOutputArguments, NumInputStreamsForVPBlit); } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcess::EmulateVPBlit(VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, UINT NumInputStreams, VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments, UINT StartStream) { DWORD nDstViews = pOutputArguments->D3D12OutputStreamDesc.EnableStereo ? 2 : 1; for (DWORD dstView = 0; dstView < nDstViews; dstView++) { for (UINT stream = StartStream; stream < NumInputStreams; stream++) { D3D12_VIDEO_PROCESS_INPUT_STREAM_ARGUMENTS1& inputArgs = pInputArguments->D3D12InputStreamArguments[stream]; VIDEO_PROCESS_STREAM_INFO& inputInfo = pInputArguments->StreamInfo[stream]; DWORD nSrcViews = pInputArguments->D3D12InputStreamDesc[stream].StereoFormat == D3D12_VIDEO_FRAME_STEREO_FORMAT_SEPARATE ? 2 : 1; for (DWORD srcView = 0; srcView < nSrcViews; srcView++) { m_pParent->m_BlitHelper.Blit( inputInfo.ResourceSet[srcView].CurrentFrame.pResource, // pSrc inputInfo.ResourceSet[srcView].CurrentFrame.SubresourceSubset.MinSubresource(), // SrcSubresourceIdx inputArgs.Transform.SourceRectangle, // SrcRect pOutputArguments->CurrentFrame[dstView].pResource, // pDst pOutputArguments->CurrentFrame[dstView].SubresourceSubset.MinSubresource(), // DstSubresourceIdx inputArgs.Transform.DestinationRectangle, // DstRect inputArgs.AlphaBlending.Enable // bEnableAlpha ); } } } } // // Reads from the VIDEO_PROCESS_INPUT_ARGUMENTS' ResourceSet, building D3D12_VIDEO_PROCESS_INPUT_ARGUMENTS InputStream data // void VIDEO_PROCESS_INPUT_ARGUMENTS::PrepareResources(_In_ UINT stream, _In_ UINT view) { VIDEO_PROCESS_STREAM_INFO *pStreamInfo = &StreamInfo[stream]; D3D12_VIDEO_PROCESS_INPUT_STREAM *pD3D12InputStream = &D3D12InputStreamArguments[stream].InputStream[view]; // Fill current frame into D3D12 input stream arguments Resource *pCurFrame = pStreamInfo->ResourceSet[view].CurrentFrame.pResource; UINT subresource = pStreamInfo->ResourceSet[view].CurrentFrame.SubresourceSubset.MinSubresource(); pD3D12InputStream->pTexture2D = pCurFrame->GetUnderlyingResource(); pD3D12InputStream->Subresource = subresource; // Fill past frames into D3D12 input stream arguments UINT NumPastFrames = (UINT)pStreamInfo->ResourceSet[view].PastFrames.size(); pD3D12InputStream->ReferenceSet.NumPastFrames = NumPastFrames; if (NumPastFrames) { pStreamInfo->ResourceSet[view].PastSubresources.resize(NumPastFrames); pStreamInfo->ResourceSet[view].D3D12ResourcePastFrames.resize(NumPastFrames); pD3D12InputStream->ReferenceSet.ppPastFrames = pStreamInfo->ResourceSet[view].D3D12ResourcePastFrames.data(); pD3D12InputStream->ReferenceSet.pPastSubresources = pStreamInfo->ResourceSet[view].PastSubresources.data(); for (DWORD pastFrame = 0; pastFrame < NumPastFrames; pastFrame++) { Resource *pPastFrame = pStreamInfo->ResourceSet[view].PastFrames[pastFrame].pResource; subresource = pStreamInfo->ResourceSet[view].PastFrames[pastFrame].SubresourceSubset.MinSubresource(); pD3D12InputStream->ReferenceSet.ppPastFrames[pastFrame] = pPastFrame->GetUnderlyingResource(); pD3D12InputStream->ReferenceSet.pPastSubresources[pastFrame] = subresource; } } UINT NumFutureFrames = (UINT)pStreamInfo->ResourceSet[view].FutureFrames.size(); pD3D12InputStream->ReferenceSet.NumFutureFrames = NumFutureFrames; if (NumFutureFrames) { pStreamInfo->ResourceSet[view].FutureSubresources.resize(NumFutureFrames); pStreamInfo->ResourceSet[view].D3D12ResourceFutureFrames.resize(NumFutureFrames); pD3D12InputStream->ReferenceSet.ppFutureFrames = pStreamInfo->ResourceSet[view].D3D12ResourceFutureFrames.data(); pD3D12InputStream->ReferenceSet.pFutureSubresources = pStreamInfo->ResourceSet[view].FutureSubresources.data(); for (DWORD FutureFrame = 0; FutureFrame < NumFutureFrames; FutureFrame++) { Resource *pFutureFrame = pStreamInfo->ResourceSet[view].FutureFrames[FutureFrame].pResource; subresource = pStreamInfo->ResourceSet[view].FutureFrames[FutureFrame].SubresourceSubset.MinSubresource(); pD3D12InputStream->ReferenceSet.ppFutureFrames[FutureFrame] = pFutureFrame->GetUnderlyingResource(); pD3D12InputStream->ReferenceSet.pFutureSubresources[FutureFrame] = subresource; } } { const D3D12_RESOURCE_DESC &desc = D3D12InputStreamArguments[stream].InputStream[view].pTexture2D->GetDesc(); D3D12_VIDEO_PROCESS_INPUT_STREAM_DESC& inputDesc = D3D12InputStreamDesc[stream]; inputDesc.Format = desc.Format; // set default colorspace if not set, based on DXGI format if (!StreamInfo[stream].ColorSpaceSet) { inputDesc.ColorSpace = CDXGIColorSpaceHelper::ConvertFromLegacyColorSpace(!CD3D11FormatHelper::YUV(desc.Format), CD3D11FormatHelper::GetBitsPerUnit(desc.Format), /* StudioRGB= */ false, /* P709= */ true, /* StudioYUV= */ true); } } } void VIDEO_PROCESS_INPUT_ARGUMENTS::PrepareStreamArguments(_In_ UINT stream) { PrepareTransform(stream); // Update Rate Info D3D12InputStreamArguments[stream].RateInfo.OutputIndex = StreamInfo[stream].OutputIndex; D3D12InputStreamArguments[stream].RateInfo.InputFrameOrField = StreamInfo[stream].InputFrameOrField; } void VIDEO_PROCESS_INPUT_ARGUMENTS::PrepareTransform(_In_ UINT stream) { // compute final orientation of the input stream based on our saved OrientationInfo D3D12_VIDEO_PROCESS_ORIENTATION orientation = FinalOrientation(StreamInfo[stream].OrientationInfo.Rotation, StreamInfo[stream].OrientationInfo.FlipHorizontal, StreamInfo[stream].OrientationInfo.FlipVertical); D3D12InputStreamArguments[stream].Transform.Orientation = orientation; D3D12InputStreamDesc[stream].EnableOrientation = (orientation == D3D12_VIDEO_PROCESS_ORIENTATION_DEFAULT) ? FALSE : TRUE; D3D12_RESOURCE_DESC desc = D3D12InputStreamArguments[stream].InputStream[0].pTexture2D->GetDesc(); if (!StreamInfo[stream].EnableSourceRect) { // if source rectangle is not set, entire input surface according to DX11 spec D3D12InputStreamArguments[stream].Transform.SourceRectangle = CD3DX12_RECT(0, 0, (LONG)desc.Width, (LONG)desc.Height); D3D12_VIDEO_SIZE_RANGE& SourceSizeRange = D3D12InputStreamDesc[stream].SourceSizeRange; SourceSizeRange.MaxWidth = static_cast(desc.Width); SourceSizeRange.MinWidth = static_cast(desc.Width); SourceSizeRange.MaxHeight = desc.Height; SourceSizeRange.MinHeight = desc.Height; } if (!StreamInfo[stream].EnableDestinationRect) { // if dest rectangle is not set, no data is written from this stream according to DX11 spec D3D12InputStreamArguments[stream].Transform.DestinationRectangle = CD3DX12_RECT(0, 0, 0, 0); D3D12_VIDEO_SIZE_RANGE& DestinationSizeRange = D3D12InputStreamDesc[stream].DestinationSizeRange; DestinationSizeRange.MaxWidth = static_cast(desc.Width); DestinationSizeRange.MinWidth = static_cast(desc.Width); DestinationSizeRange.MaxHeight = desc.Height;; DestinationSizeRange.MinHeight = desc.Height; } } D3D12_VIDEO_PROCESS_ORIENTATION VIDEO_PROCESS_INPUT_ARGUMENTS::FinalOrientation(_In_ D3D12_VIDEO_PROCESS_ORIENTATION Rotation, _In_ BOOL FlipHorizontal, _In_ BOOL FlipVertical) { D3D12_VIDEO_PROCESS_ORIENTATION Operation = D3D12_VIDEO_PROCESS_ORIENTATION_DEFAULT; // // D3D11 specifies rotation followed by flips // if (FlipHorizontal && FlipVertical) { Operation = D3D12_VIDEO_PROCESS_ORIENTATION_CLOCKWISE_180; } else if (FlipVertical) { Operation = D3D12_VIDEO_PROCESS_ORIENTATION_FLIP_VERTICAL; } else if (FlipHorizontal) { Operation = D3D12_VIDEO_PROCESS_ORIENTATION_FLIP_HORIZONTAL; } static_assert(D3D12_VIDEO_PROCESS_ORIENTATION_CLOCKWISE_270_FLIP_HORIZONTAL == 7); constexpr UINT NumOrientations = 8; return (D3D12_VIDEO_PROCESS_ORIENTATION)(((UINT)Rotation + (UINT)Operation) % NumOrientations); } // // Transitions the resources referenced by VIDEO_PROCESS_INPUT_ARGUMENTS to D3D12_RESOURCE_STATE_VIDEO_PROCESS_READ. // void VIDEO_PROCESS_INPUT_ARGUMENTS::TransitionResources(_In_ ImmediateContext *pParent, _In_ UINT stream, _In_ UINT view) { VIDEO_PROCESS_STREAM_INFO *pStreamInfo = &StreamInfo[stream]; Resource *pCurFrame = pStreamInfo->ResourceSet[view].CurrentFrame.pResource; pParent->GetResourceStateManager().TransitionSubresources(pCurFrame, pStreamInfo->ResourceSet[view].CurrentFrame.SubresourceSubset, D3D12_RESOURCE_STATE_VIDEO_PROCESS_READ, COMMAND_LIST_TYPE::VIDEO_PROCESS); // Fill past frames into D3D12 input stream arguments UINT NumPastFrames = (UINT)pStreamInfo->ResourceSet[view].PastFrames.size(); if (NumPastFrames) { for (DWORD pastFrame = 0; pastFrame < NumPastFrames; pastFrame++) { Resource *pPastFrame = pStreamInfo->ResourceSet[view].PastFrames[pastFrame].pResource; pParent->GetResourceStateManager().TransitionSubresources(pPastFrame, pStreamInfo->ResourceSet[view].PastFrames[pastFrame].SubresourceSubset, D3D12_RESOURCE_STATE_VIDEO_PROCESS_READ, COMMAND_LIST_TYPE::VIDEO_PROCESS); } } UINT NumFutureFrames = (UINT)pStreamInfo->ResourceSet[view].FutureFrames.size(); if (NumFutureFrames) { for (DWORD FutureFrame = 0; FutureFrame < NumFutureFrames; FutureFrame++) { Resource *pFutureFrame = pStreamInfo->ResourceSet[view].FutureFrames[FutureFrame].pResource; pParent->GetResourceStateManager().TransitionSubresources(pFutureFrame, pStreamInfo->ResourceSet[view].FutureFrames[FutureFrame].SubresourceSubset, D3D12_RESOURCE_STATE_VIDEO_PROCESS_READ, COMMAND_LIST_TYPE::VIDEO_PROCESS); } } } // // Reads from the VIDEO_PROCESS_OUTPUT_ARGUMENTS' CurrentFrame data, building D3D12_VIDEO_PROCESS_OUTPUT_ARGUMENTS OutputStream data // void VIDEO_PROCESS_OUTPUT_ARGUMENTS::PrepareResources() { DWORD nViews = D3D12OutputStreamDesc.EnableStereo ? 2 : 1; for (DWORD view = 0; view < nViews; view++) { Resource *pCurrentFrame = CurrentFrame[view].pResource; D3D12OutputStreamArguments.OutputStream[view].pTexture2D = pCurrentFrame->GetUnderlyingResource(); D3D12OutputStreamArguments.OutputStream[view].Subresource = CurrentFrame[view].SubresourceSubset.MinSubresource(); } const D3D12_RESOURCE_DESC &desc = D3D12OutputStreamArguments.OutputStream[0].pTexture2D->GetDesc(); D3D12_VIDEO_PROCESS_OUTPUT_STREAM_DESC& outputDesc = D3D12OutputStreamDesc; outputDesc.Format = desc.Format; if (!ColorSpaceSet) { outputDesc.ColorSpace = CDXGIColorSpaceHelper::ConvertFromLegacyColorSpace(!CD3D11FormatHelper::YUV(desc.Format), CD3D11FormatHelper::GetBitsPerUnit(desc.Format), /* StudioRGB= */ false, /* P709= */ true, /* StudioYUV= */ true); } bool isOutputYCbCr = !CDXGIColorSpaceHelper::IsRGBColorSpace(outputDesc.ColorSpace); if (BackgroundColorSet && BackgroundColorYCbCr != isOutputYCbCr) { // we will need to convert the background color to the dest colorspace, since they are not matching. FLOAT converted[4] = {}; if (BackgroundColorYCbCr) { // Note that we do not have any info about the color space of the background color. Assume studio 709. ColorConvertNormalized(outputDesc.BackgroundColor, DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709, converted, outputDesc.ColorSpace); } else { // Note that we do not have any info about the color space of the background color. Assume full 709. ColorConvertNormalized(outputDesc.BackgroundColor, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, converted, outputDesc.ColorSpace); } memcpy(outputDesc.BackgroundColor, converted, sizeof(outputDesc.BackgroundColor)); } // we have now updated our D3D12 outputdesc background color if needed BackgroundColorSet = false; } void VIDEO_PROCESS_OUTPUT_ARGUMENTS::PrepareTransform() { D3D12_RESOURCE_DESC desc = D3D12OutputStreamArguments.OutputStream[0].pTexture2D->GetDesc(); if (!EnableTargetRect) { // if target rectangle is not set, entire output surface according to DX11 spec D3D12OutputStreamArguments.TargetRectangle = CD3DX12_RECT(0, 0, (LONG)desc.Width, (LONG)desc.Height); } } // // Transitions the resources referenced by VIDEO_PROCESS_OUTPUT_ARGUMENTS to D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE. // void VIDEO_PROCESS_OUTPUT_ARGUMENTS::TransitionResources(_In_ ImmediateContext *pParent) { DWORD nViews = D3D12OutputStreamDesc.EnableStereo ? 2 : 1; for (DWORD view = 0; view < nViews; view++) { Resource *pCurrentFrame = CurrentFrame[view].pResource; // video process barrier for the output buffer pParent->GetResourceStateManager().TransitionSubresources(pCurrentFrame, CurrentFrame[view].SubresourceSubset, D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE, COMMAND_LIST_TYPE::VIDEO_PROCESS); } } //---------------------------------------------------------------------------------------------------------------------------------- void DeinterlacePrepass::Process(_Inout_ VIDEO_PROCESS_INPUT_ARGUMENTS *pInputArguments, UINT NumInputStreams, _In_ VIDEO_PROCESS_OUTPUT_ARGUMENTS *pOutputArguments) { for (UINT stream = 0; stream < NumInputStreams; ++stream) { DWORD nViews = pInputArguments->D3D12InputStreamDesc[stream].StereoFormat == D3D12_VIDEO_FRAME_STEREO_FORMAT_SEPARATE ? 2 : 1; bool bIsInterlaced = false; bool bNeedToDeinterlace = false; bool bCanDoInterlace = false; if (pInputArguments->D3D12InputStreamArguments[stream].FieldType != D3D12_VIDEO_FIELD_TYPE_NONE) { // Content is interlaced bIsInterlaced = true; // Check if the underlying hardware can do the deinterlace. // TODO: If there's another translation layer op here, how to determine the right input parameters to pass? auto pInputResource = pInputArguments->StreamInfo[stream].ResourceSet[0].CurrentFrame.pResource; auto& InputResourceDesc = pInputResource->Parent()->m_desc12; D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT VPSupport = { m_pParent->GetNodeIndex(), { (UINT)InputResourceDesc.Width, InputResourceDesc.Height, { pInputArguments->D3D12InputStreamDesc[stream].Format, pInputArguments->D3D12InputStreamDesc[stream].ColorSpace } }, pInputArguments->D3D12InputStreamArguments[stream].FieldType, pInputArguments->D3D12InputStreamDesc[stream].StereoFormat, pInputArguments->D3D12InputStreamDesc[stream].FrameRate, { pOutputArguments->D3D12OutputStreamDesc.Format, pOutputArguments->D3D12OutputStreamDesc.ColorSpace }, pOutputArguments->D3D12OutputStreamDesc.EnableStereo ? D3D12_VIDEO_FRAME_STEREO_FORMAT_SEPARATE : D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, pOutputArguments->D3D12OutputStreamDesc.FrameRate, }; CComQIPtr spVideoDevice = m_pParent->m_pDevice12.get(); bNeedToDeinterlace = FAILED(spVideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_PROCESS_SUPPORT, &VPSupport, sizeof(VPSupport))) || (VPSupport.DeinterlaceSupport & D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_BOB) == D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_NONE; // If we can't do it, we'll still make the stream appear progressive bCanDoInterlace = InputResourceDesc.Format == DXGI_FORMAT_NV12 && (pInputArguments->D3D12InputStreamDesc[stream].FrameRate.Numerator / pInputArguments->D3D12InputStreamDesc[stream].FrameRate.Denominator == pOutputArguments->D3D12OutputStreamDesc.FrameRate.Numerator / pOutputArguments->D3D12OutputStreamDesc.FrameRate.Denominator); } for (DWORD view = 0; view < nViews; view++) { if (bNeedToDeinterlace && bCanDoInterlace) { // Create an intermediate matching the input. VideoProcessView& InputView = pInputArguments->StreamInfo[stream].ResourceSet[view].CurrentFrame; Resource* pInputResource = InputView.pResource; ResourceCreationArgs IntermediateArgs = *pInputResource->Parent(); CViewSubresourceSubset SrcSubresources = InputView.SubresourceSubset; assert(InputView.SubresourceSubset.NumExtendedSubresources() == 2); IntermediateArgs.m_desc12.DepthOrArraySize = 1; IntermediateArgs.m_desc12.MipLevels = 1; IntermediateArgs.m_appDesc.m_ArraySize = 1; IntermediateArgs.m_appDesc.m_MipLevels = 1; IntermediateArgs.m_appDesc.m_SubresourcesPerPlane = 1; IntermediateArgs.m_appDesc.m_Subresources = 2; if (m_spIntermediates.size() <= stream) { m_spIntermediates.resize(NumInputStreams); // throw( bad_alloc ) } // If we already have an existing intermediate, check if it's compatible auto& spIntermediate = m_spIntermediates[stream][view]; if (spIntermediate) { ResourceCreationArgs const& ExistingResourceArgs = *spIntermediate->Parent(); assert(ExistingResourceArgs.m_desc12.Format == DXGI_FORMAT_NV12); if (ExistingResourceArgs.m_desc12.Width != IntermediateArgs.m_desc12.Width || ExistingResourceArgs.m_desc12.Height != IntermediateArgs.m_desc12.Height) { spIntermediate.reset(); } } if (!spIntermediate) { IntermediateArgs.m_desc12.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; spIntermediate = Resource::CreateResource(m_pParent, IntermediateArgs, ResourceAllocationContext::ImmediateContextThreadLongLived); // throw( bad_alloc, _com_error ) } // Replace input stream resource with intermediate InputView.pResource = spIntermediate.get(); InputView.SubresourceSubset = CViewSubresourceSubset(CSubresourceSubset(1, 1, 2), 1, 1, 2); // Strip past and future frames pInputArguments->StreamInfo[stream].ResourceSet[view].PastFrames.clear(); pInputArguments->StreamInfo[stream].ResourceSet[view].PastSubresources.clear(); pInputArguments->StreamInfo[stream].ResourceSet[view].FutureFrames.clear(); pInputArguments->StreamInfo[stream].ResourceSet[view].FutureSubresources.clear(); pInputArguments->StreamInfo[stream].ResourceSet[view].D3D12ResourcePastFrames.clear(); pInputArguments->StreamInfo[stream].ResourceSet[view].D3D12ResourceFutureFrames.clear(); pInputArguments->PrepareResources(stream, view); CreatePipelines(DXGI_FORMAT_R8_UINT); CreatePipelines(DXGI_FORMAT_R8G8_UINT); // Issue draws bool bTopFrame = ((pInputArguments->D3D12InputStreamArguments[stream].FieldType == D3D12_VIDEO_FIELD_TYPE_INTERLACED_TOP_FIELD_FIRST ? 0 : 1) + pInputArguments->D3D12InputStreamArguments[stream].RateInfo.OutputIndex) % 2 == 0; DoDeinterlace(pInputResource, SrcSubresources, spIntermediate.get(), bTopFrame); } else if (m_spIntermediates.size() > stream && m_spIntermediates[stream][view]) { m_spIntermediates[stream][view].reset(); } } if (bNeedToDeinterlace) { // Make the stream appear not interlaced anymore pInputArguments->D3D12InputStreamArguments[stream].FieldType = D3D12_VIDEO_FIELD_TYPE_NONE; pInputArguments->D3D12InputStreamDesc[stream].DeinterlaceMode = D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_NONE; pInputArguments->D3D12InputStreamArguments[stream].RateInfo.OutputIndex /= 2; } else if (bIsInterlaced) { pInputArguments->D3D12InputStreamDesc[stream].DeinterlaceMode = m_DeinterlaceMode; } } } //---------------------------------------------------------------------------------------------------------------------------------- void DeinterlacePrepass::CreatePipelines(DXGI_FORMAT RTVFormat) { if (!m_spRootSig) { m_spRootSig.reset(new InternalRootSignature(m_pParent)); // throw( bad_alloc ) m_spRootSig->Create(g_DeinterlacePS, sizeof(g_DeinterlacePS)); // throw( _com_error ) } auto& spPSO = m_spDeinterlacePSOs[RTVFormat]; if (!spPSO) { spPSO.reset(new VideoProcessPipelineState(m_pParent)); struct VPPSOStream { CD3DX12_PIPELINE_STATE_STREAM_VS VS{CD3DX12_SHADER_BYTECODE(g_DeinterlaceVS, sizeof(g_DeinterlaceVS))}; CD3DX12_PIPELINE_STATE_STREAM_PS PS{CD3DX12_SHADER_BYTECODE(g_DeinterlacePS, sizeof(g_DeinterlacePS))}; CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopology{D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE}; CD3DX12_PIPELINE_STATE_STREAM_NODE_MASK NodeMask; CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTVFormats; CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL DSS; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC Samples{DXGI_SAMPLE_DESC{1, 0}}; CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask{UINT_MAX}; } PSODesc; PSODesc.NodeMask = m_pParent->GetNodeMask(); PSODesc.RTVFormats = D3D12_RT_FORMAT_ARRAY{ {RTVFormat}, 1 }; CD3DX12_DEPTH_STENCIL_DESC DSS(CD3DX12_DEFAULT{}); DSS.DepthEnable = false; PSODesc.DSS = DSS; D3D12_PIPELINE_STATE_STREAM_DESC StreamDesc = { sizeof(PSODesc), &PSODesc }; ThrowFailure(m_pParent->m_pDevice12_2->CreatePipelineState(&StreamDesc, IID_PPV_ARGS(spPSO->GetForCreate()))); } } //---------------------------------------------------------------------------------------------------------------------------------- void DeinterlacePrepass::DoDeinterlace(Resource* pSrc, CViewSubresourceSubset SrcSubset, Resource* pDst, bool bTopFrame) { m_pParent->PreRender(COMMAND_LIST_TYPE::GRAPHICS); m_pParent->GetResourceStateManager().TransitionSubresources(pSrc, SrcSubset, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, COMMAND_LIST_TYPE::GRAPHICS); m_pParent->GetResourceStateManager().TransitionResource(pDst, D3D12_RESOURCE_STATE_RENDER_TARGET, COMMAND_LIST_TYPE::GRAPHICS); m_pParent->GetResourceStateManager().ApplyAllResourceTransitions(); // According to documentation, VPBlt doesn't respect predication ImmediateContext::CDisablePredication DisablePredication(m_pParent); UINT SRVBaseSlot = m_pParent->ReserveSlots(m_pParent->m_ViewHeap, SrcSubset.NumExtendedSubresources()); ID3D12GraphicsCommandList* pCommandList = m_pParent->GetGraphicsCommandList(); pCommandList->SetGraphicsRootSignature(m_spRootSig->GetRootSignature()); pCommandList->SetGraphicsRoot32BitConstant(0, bTopFrame ? 1 : 0, 0); pCommandList->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); { // Unbind all VBs D3D12_VERTEX_BUFFER_VIEW VBVArray[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT]; memset(VBVArray, 0, sizeof(VBVArray)); pCommandList->IASetVertexBuffers(0, D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, VBVArray); } for (auto&& Range : SrcSubset) { for (UINT subresource = Range.first; subresource < Range.second; ++subresource) { UINT8 SrcPlane = 0, SrcMip = 0; UINT16 SrcArraySlice = 0; D3D12DecomposeSubresource(subresource, pSrc->AppDesc()->MipLevels(), pSrc->AppDesc()->ArraySize(), SrcMip, SrcArraySlice, SrcPlane); auto& SrcFootprint = pSrc->GetSubresourcePlacement(subresource).Footprint; DXGI_FORMAT ViewFormat = SrcFootprint.Format; switch (ViewFormat) { case DXGI_FORMAT_R8_TYPELESS: ViewFormat = DXGI_FORMAT_R8_UINT; break; case DXGI_FORMAT_R8G8_TYPELESS: ViewFormat = DXGI_FORMAT_R8G8_UINT; break; case DXGI_FORMAT_R16_TYPELESS: ViewFormat = DXGI_FORMAT_R16_UINT; break; case DXGI_FORMAT_R16G16_TYPELESS: ViewFormat = DXGI_FORMAT_R16G16_UINT; break; } D3D12_SHADER_RESOURCE_VIEW_DESC SRVDesc = {}; SRVDesc.Format = ViewFormat; SRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; SRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; SRVDesc.Texture2DArray.MipLevels = 1; SRVDesc.Texture2DArray.MostDetailedMip = SrcMip; SRVDesc.Texture2DArray.PlaneSlice = SrcPlane; SRVDesc.Texture2DArray.ArraySize = 1; SRVDesc.Texture2DArray.FirstArraySlice = SrcArraySlice; SRVDesc.Texture2DArray.ResourceMinLODClamp = 0.0f; SRV inputSRV(m_pParent, SRVDesc, *pSrc); #if DBG UINT DstSubresource = ComposeSubresourceIdxExtended(0, 0, SrcPlane, 1, 1); auto& DstFootprint = pDst->GetSubresourcePlacement(DstSubresource).Footprint; assert(DstFootprint.Format == SrcFootprint.Format && DstFootprint.Width == SrcFootprint.Width && DstFootprint.Height == SrcFootprint.Height && DstSubresource == SrcPlane); #endif D3D12_RENDER_TARGET_VIEW_DESC RTVDesc = {}; RTVDesc.Format = ViewFormat; RTVDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; RTVDesc.Texture2D.MipSlice = 0; RTVDesc.Texture2D.PlaneSlice = SrcPlane; RTV outputRTV(m_pParent, RTVDesc, *pDst); D3D12_CPU_DESCRIPTOR_HANDLE SRVBaseCPU = m_pParent->m_ViewHeap.CPUHandle(SRVBaseSlot); D3D12_GPU_DESCRIPTOR_HANDLE SRVBaseGPU = m_pParent->m_ViewHeap.GPUHandle(SRVBaseSlot); SRVBaseSlot++; m_pParent->m_pDevice12->CopyDescriptorsSimple(1, SRVBaseCPU, inputSRV.GetRefreshedDescriptorHandle(), D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); pCommandList->SetPipelineState(m_spDeinterlacePSOs[ViewFormat]->GetForUse(COMMAND_LIST_TYPE::GRAPHICS)); CD3DX12_VIEWPORT Viewport(0.f, 0.f, (FLOAT)SrcFootprint.Width, (FLOAT)SrcFootprint.Height); CD3DX12_RECT Scissor(0, 0, SrcFootprint.Width, SrcFootprint.Height); pCommandList->RSSetViewports(1, &Viewport); pCommandList->RSSetScissorRects(1, &Scissor); auto Descriptor = outputRTV.GetRefreshedDescriptorHandle(); pCommandList->OMSetRenderTargets(1, &Descriptor, TRUE, nullptr); pCommandList->SetGraphicsRootDescriptorTable(1, SRVBaseGPU); pCommandList->DrawInstanced(4, 1, 0, 0); } } m_pParent->PostRender(COMMAND_LIST_TYPE::GRAPHICS, e_GraphicsStateDirty); } }; ================================================ FILE: src/VideoProcessEnum.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" #include #include namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- void VideoProcessEnum::Initialize() { if (!m_pParent->m_pDevice12_1) { ThrowFailure(E_NOINTERFACE); } ThrowFailure(m_pParent->m_pDevice12_1->QueryInterface(&m_spVideoDevice)); } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcessEnum::CheckFeatureSupport(D3D12_FEATURE_VIDEO FeatureVideo, void* pFeatureSupportData, UINT FeatureSupportDataSize) { switch (FeatureVideo) { case D3D12_FEATURE_VIDEO_PROCESS_SUPPORT: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_PROCESS_MAX_INPUT_STREAMS: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } case D3D12_FEATURE_VIDEO_PROCESS_REFERENCE_INFO: { SetFeatureDataNodeIndex(pFeatureSupportData, FeatureSupportDataSize, m_pParent->GetNodeIndex()); break; } default: ThrowFailure(E_NOTIMPL); break; } ThrowFailure(m_spVideoDevice->CheckFeatureSupport(FeatureVideo, pFeatureSupportData, FeatureSupportDataSize)); if (FeatureVideo == D3D12_FEATURE_VIDEO_PROCESS_MAX_INPUT_STREAMS) { if (FeatureSupportDataSize == sizeof(D3D12_FEATURE_DATA_VIDEO_PROCESS_MAX_INPUT_STREAMS)) { D3D12_FEATURE_DATA_VIDEO_PROCESS_MAX_INPUT_STREAMS *pMaxInputStreamsData = static_cast(pFeatureSupportData); if (pMaxInputStreamsData->MaxInputStreams < MIN_SUPPORTED_INPUT_STREAMS_VIA_EMULATION) { pMaxInputStreamsData->MaxInputStreams = MIN_SUPPORTED_INPUT_STREAMS_VIA_EMULATION; } } else { ThrowFailure(E_INVALIDARG); } } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcessEnum::UpdateReferenceInfo(D3D12_FEATURE_DATA_VIDEO_PROCESS_REFERENCE_INFO &referenceInfo, DXGI_RATIONAL &inputFrameRate, DXGI_RATIONAL &outputFrameRate, UINT &pastFrames, UINT &futureFrames) { referenceInfo.InputFrameRate = inputFrameRate; referenceInfo.OutputFrameRate = outputFrameRate; m_spVideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_PROCESS_REFERENCE_INFO, &referenceInfo, sizeof(referenceInfo)); if (referenceInfo.PastFrames > pastFrames) { pastFrames = referenceInfo.PastFrames; } if (referenceInfo.FutureFrames > futureFrames) { futureFrames = referenceInfo.FutureFrames; } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ bool VideoProcessEnum::IsSupported(const D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT &dx12Support, UINT outputWidth = 0, UINT outputHeight = 0) { ThrowFailure(m_spVideoDevice->CheckFeatureSupport(D3D12_FEATURE_VIDEO_PROCESS_SUPPORT, (void*)&dx12Support, sizeof(dx12Support))); if ((dx12Support.SupportFlags & D3D12_VIDEO_PROCESS_SUPPORT_FLAG_SUPPORTED) == D3D12_VIDEO_PROCESS_SUPPORT_FLAG_SUPPORTED) { if ((outputWidth == 0 && outputHeight == 0) || IsScaleSupported(dx12Support.ScaleSupport, outputWidth, outputHeight)) { return true; } } return false; } //---------------------------------------------------------------------------------------------------------------------------------- ReferenceInfo VideoProcessEnum::UpdateReferenceInfo(D3D12_VIDEO_PROCESS_DEINTERLACE_FLAGS DeinterlaceSupport) { // // for each of the supported tuples, get sample support for frame rate conversion/past and future frames required // FrameRatePair frameRatePairs[] = { { { 30, 1 },{ 60, 1 } }, { { 30000, 1001 },{ 60000, 1001 } }, { { 30000, 1001 },{ 60, 1 } }, { { 30, 1 },{ 60000, 1001 } }, { { 60, 1 },{ 30, 1 } }, { { 30, 1 },{ 24, 1 } }, { { 24, 1 },{ 30, 1 } } }; ReferenceInfo updateResults; // TODO: inverse telecine? we could do several rates and verify the modes. for (auto& vpSupportTuple : m_vpCapsSupportTuples) { // past/future frames { D3D12_FEATURE_DATA_VIDEO_PROCESS_REFERENCE_INFO referenceInfo = {}; referenceInfo.DeinterlaceMode = DeinterlaceSupport; referenceInfo.Filters = vpSupportTuple.dx12Support.FilterSupport; referenceInfo.FeatureSupport = vpSupportTuple.dx12Support.FeatureSupport; referenceInfo.EnableAutoProcessing = (vpSupportTuple.dx12Support.AutoProcessingSupport != 0) ? TRUE : FALSE; UpdateReferenceInfo(referenceInfo, vpSupportTuple.dx12Support.InputFrameRate, vpSupportTuple.dx12Support.OutputFrameRate, updateResults.pastFrames, updateResults.futureFrames); for (auto& frameRate : frameRatePairs) { UpdateReferenceInfo(referenceInfo, frameRate.Input, frameRate.Output, updateResults.pastFrames, updateResults.futureFrames); } } // and now, we do frame rates conversion support, adding the optional input frame rates if (!updateResults.frameRateConversionSupported) { if (vpSupportTuple.dx12Support.InputFrameRate.Numerator != vpSupportTuple.dx12Support.OutputFrameRate.Numerator || vpSupportTuple.dx12Support.InputFrameRate.Denominator != vpSupportTuple.dx12Support.OutputFrameRate.Denominator) { updateResults.frameRateConversionSupported = true; } else { for (auto& frameRate : frameRatePairs) { D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT frameRateConversionSupport = vpSupportTuple.dx12Support; frameRateConversionSupport.InputFrameRate = frameRate.Input; frameRateConversionSupport.OutputFrameRate = frameRate.Output; if (IsSupported(frameRateConversionSupport)) { updateResults.frameRateConversionSupported = true; break; } } } } } return updateResults; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void VideoProcessEnum::CacheVideoProcessInfo(VIDEO_PROCESS_ENUM_ARGS &args) { VIDEO_PROCESS_SUPPORT vpRGBSupportArray[] = { { m_pParent->GetNodeIndex(), { args.InputWidth, args.InputHeight,{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 } }, args.InputFieldType, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.InputFrameRate, { DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 }, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.OutputFrameRate }, { m_pParent->GetNodeIndex(), { args.InputWidth, args.InputHeight,{ DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 } }, args.InputFieldType, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.InputFrameRate, { DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 }, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.OutputFrameRate } }; struct { DXGI_COLOR_SPACE_TYPE inputColorSpace; DXGI_COLOR_SPACE_TYPE outputColorSpace; VIDEO_PROCESS_CONVERSION_CAPS conversionCap; } colorSpaceTuplesRGB[] = { // same color space for input & output { DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709, DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P2020, DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P2020, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_RGB_STUDIO_G2084_NONE_P2020, DXGI_COLOR_SPACE_RGB_STUDIO_G2084_NONE_P2020, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, // full <-> studio conversions { DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709, VIDEO_PROCESS_CONVERSION_CAPS_RGB_RANGE_CONVERSION }, { DXGI_COLOR_SPACE_RGB_STUDIO_G22_NONE_P709, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709, VIDEO_PROCESS_CONVERSION_CAPS_RGB_RANGE_CONVERSION }, }; VIDEO_PROCESS_SUPPORT vpYUVSupportArray[] = { { m_pParent->GetNodeIndex(), { args.InputWidth, args.InputHeight,{ DXGI_FORMAT_NV12, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709 } }, args.InputFieldType, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.InputFrameRate, { DXGI_FORMAT_NV12, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709 }, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.OutputFrameRate } }; struct { DXGI_COLOR_SPACE_TYPE inputColorSpace; DXGI_COLOR_SPACE_TYPE outputColorSpace; VIDEO_PROCESS_CONVERSION_CAPS conversionCap; } colorSpaceTuplesYUV[] = { // same color space for input & output { DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709, VIDEO_PROCESS_CONVERSION_CAPS_NOMINAL_RANGE }, { DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601, VIDEO_PROCESS_CONVERSION_CAPS_NOMINAL_RANGE }, { DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709, DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601, DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, // full <-> studio conversions { DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709, DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601, DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, { DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601, VIDEO_PROCESS_CONVERSION_CAPS_NONE }, // 601 <-> 709 conversions { DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601, VIDEO_PROCESS_CONVERSION_CAPS_YCbCr_MATRIX_CONVERSION }, { DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P601, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709, VIDEO_PROCESS_CONVERSION_CAPS_YCbCr_MATRIX_CONVERSION }, { DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601, DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709, VIDEO_PROCESS_CONVERSION_CAPS_YCbCr_MATRIX_CONVERSION }, { DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P709, DXGI_COLOR_SPACE_YCBCR_STUDIO_G22_LEFT_P601, VIDEO_PROCESS_CONVERSION_CAPS_YCbCr_MATRIX_CONVERSION }, }; // // Fill VP caps support sample tuple array // m_deinterlaceFlags = D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_NONE; m_autoprocessingSupported = false; m_vpCapsSupportTuples.reserve(m_vpCapsSupportTuples.size() + _countof(colorSpaceTuplesRGB) * _countof(vpRGBSupportArray) + _countof(colorSpaceTuplesYUV) * _countof(vpYUVSupportArray)); for (auto& vpRGBSupport : vpRGBSupportArray) { for (auto& tuple : colorSpaceTuplesRGB) { vpRGBSupport.dx12Support.InputSample.Format.ColorSpace = tuple.inputColorSpace; vpRGBSupport.dx12Support.OutputFormat.ColorSpace = tuple.outputColorSpace; if (IsSupported(vpRGBSupport.dx12Support, args.OutputWidth, args.OutputHeight)) { vpRGBSupport.colorConversionCaps = tuple.conversionCap; m_vpCapsSupportTuples.push_back(vpRGBSupport); m_deinterlaceFlags |= vpRGBSupport.dx12Support.DeinterlaceSupport; if (vpRGBSupport.dx12Support.AutoProcessingSupport != D3D12_VIDEO_PROCESS_AUTO_PROCESSING_FLAG_NONE) { m_autoprocessingSupported = true; } } } } for (auto& vpYUVSupport : vpYUVSupportArray) { for (auto& tuple : colorSpaceTuplesYUV) { vpYUVSupport.dx12Support.InputSample.Format.ColorSpace = tuple.inputColorSpace; vpYUVSupport.dx12Support.OutputFormat.ColorSpace = tuple.outputColorSpace; if (IsSupported(vpYUVSupport.dx12Support, args.OutputWidth, args.OutputHeight)) { vpYUVSupport.colorConversionCaps = tuple.conversionCap; vpYUVSupport.dx12Support.DeinterlaceSupport |= args.InputFieldType == D3D12_VIDEO_FIELD_TYPE_NONE ? D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_NONE : D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_BOB; m_vpCapsSupportTuples.push_back(vpYUVSupport); m_deinterlaceFlags |= vpYUVSupport.dx12Support.DeinterlaceSupport; } } } // // Get sample list of possible input and output VP formats // const D3D12_VIDEO_FORMAT videoFormats[] = { { DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 }, { DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 }, { DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 }, { DXGI_FORMAT_NV12, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709 }, { DXGI_FORMAT_P010, DXGI_COLOR_SPACE_YCBCR_STUDIO_G2084_LEFT_P2020 }, { DXGI_FORMAT_P016, DXGI_COLOR_SPACE_YCBCR_STUDIO_G2084_LEFT_P2020 }, { DXGI_FORMAT_420_OPAQUE, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709 }, { DXGI_FORMAT_YUY2, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709 }, { DXGI_FORMAT_AYUV, DXGI_COLOR_SPACE_YCBCR_FULL_G22_LEFT_P709 }, { DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020 }, }; D3D12_FEATURE_DATA_VIDEO_PROCESS_SUPPORT dx12Support = { m_pParent->GetNodeIndex(), { args.InputWidth, args.InputHeight,{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 } }, args.InputFieldType, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.InputFrameRate, { DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709 }, D3D12_VIDEO_FRAME_STEREO_FORMAT_NONE, args.OutputFrameRate }; m_vpInputFormats.reserve(_countof(videoFormats)); m_vpOutputFormats.reserve(_countof(videoFormats)); for (auto& inputVideoFormat : videoFormats) { for (auto& outputVideoFormat : videoFormats) { dx12Support.InputSample.Format = inputVideoFormat; dx12Support.OutputFormat = outputVideoFormat; if (IsSupported(dx12Support, args.OutputWidth, args.OutputHeight)) { if (std::find(m_vpInputFormats.begin(), m_vpInputFormats.end(), inputVideoFormat.Format) == m_vpInputFormats.end()) { assert((m_deinterlaceFlags & D3D12_VIDEO_PROCESS_DEINTERLACE_FLAG_BOB) != 0 || inputVideoFormat.Format != DXGI_FORMAT_NV12 || args.InputFieldType == D3D12_VIDEO_FIELD_TYPE_NONE); m_vpInputFormats.push_back(inputVideoFormat.Format); } if (std::find(m_vpOutputFormats.begin(), m_vpOutputFormats.end(), outputVideoFormat.Format) == m_vpOutputFormats.end()) { m_vpOutputFormats.push_back(outputVideoFormat.Format); } } } } } }; ================================================ FILE: src/VideoReferenceDataManager.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { //---------------------------------------------------------------------------------------------------------------------------------- static UINT16 GetInvalidReferenceIndex(VIDEO_DECODE_PROFILE_TYPE DecodeProfileType) { assert(DecodeProfileType <= VIDEO_DECODE_PROFILE_TYPE_MAX_VALID); static_assert(VIDEO_DECODE_PROFILE_TYPE_H264_MVC + 1 == VIDEO_DECODE_PROFILE_TYPE_MAX_VALID); switch (DecodeProfileType) { case VIDEO_DECODE_PROFILE_TYPE_VC1: case VIDEO_DECODE_PROFILE_TYPE_MPEG2: case VIDEO_DECODE_PROFILE_TYPE_MPEG4PT2: return DXVA_INVALID_PICTURE_INDEX; case VIDEO_DECODE_PROFILE_TYPE_H264: case VIDEO_DECODE_PROFILE_TYPE_H264_MVC: return H264_INVALID_PICTURE_INDEX; case VIDEO_DECODE_PROFILE_TYPE_HEVC: return HEVC_INVALID_PICTURE_INDEX; case VIDEO_DECODE_PROFILE_TYPE_VP8: case VIDEO_DECODE_PROFILE_TYPE_VP9: return VPX_INVALID_PICTURE_INDEX; default: return 0; }; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ ReferenceDataManager::ReferenceDataManager( ImmediateContext *pImmediateContext, VIDEO_DECODE_PROFILE_TYPE profileType) : m_pImmediateContext(pImmediateContext) , m_invalidIndex(GetInvalidReferenceIndex(profileType)) {} //---------------------------------------------------------------------------------------------------------------------------------- UINT16 ReferenceDataManager::FindRemappedIndex(UINT16 originalIndex) { // Check if the index is already mapped. assert(referenceDatas.size() < MAXUINT16); for (UINT16 remappedIndex = 0; remappedIndex < static_cast(referenceDatas.size()); remappedIndex++) { if (referenceDatas[remappedIndex].originalIndex == originalIndex) { return remappedIndex; } } return m_invalidIndex; } //---------------------------------------------------------------------------------------------------------------------------------- UINT16 ReferenceDataManager::UpdateEntry(UINT16 index) { UINT16 remappedIndex = m_invalidIndex; if (index != m_invalidIndex) { remappedIndex = FindRemappedIndex(index); bool fTransitionSubresource = true; if ( remappedIndex == m_invalidIndex || remappedIndex == m_currentOutputIndex) { // Caller specified an invalid reference index. Remap it to the current // picture index to avoid crashing and still attempt to decode. if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "Decode - Invalid Reference Index", TraceLoggingValue(index, "Index"), TraceLoggingValue(m_currentOutputIndex, "OutputIndex")); } remappedIndex = m_currentOutputIndex; // The output resource has already been transitioned to the DECODE_WRITE state when // set as the current output. For use as a reference, the resource should be in a DECODE_READ state, // but we can't express both so leave it in the WRITE state. This is an error condition, so this is // an attempt to keep the decoder producing output until we start getting correct reference indices again. fTransitionSubresource = false; } ReferenceData& referenceData = referenceDatas[remappedIndex]; decoderHeapsParameter[remappedIndex] = referenceData.decoderHeap->GetForUse(COMMAND_LIST_TYPE::VIDEO_DECODE); if (fTransitionSubresource) { TransitionReference(referenceData, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ); } textures[remappedIndex] = referenceData.referenceTexture->GetUnderlyingResource(); texturesSubresources[remappedIndex] = referenceData.subresourceIndex; } return remappedIndex; } //---------------------------------------------------------------------------------------------------------------------------------- UINT16 ReferenceDataManager::GetUpdatedEntry(UINT16 index) { UINT16 remappedIndex = m_invalidIndex; if (index != m_invalidIndex) { remappedIndex = FindRemappedIndex(index); if (remappedIndex == m_invalidIndex) { // Caller specified an invalid reference index. Remap it to the current // picture index to avoid crashing and still attempt to decode. remappedIndex = m_currentOutputIndex; } } return remappedIndex; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ UINT16 ReferenceDataManager::StoreFutureReference(UINT16 index, std::shared_ptr& decoderHeap, Resource* pTexture2D, UINT subresourceIndex) { // Check if the index was in use. UINT16 remappedIndex = FindRemappedIndex(index); if (remappedIndex == m_invalidIndex) { // If not already mapped, see if the same index in the remapped space is available. if ( index < referenceDatas.size() && referenceDatas[index].originalIndex == m_invalidIndex) { remappedIndex = index; } } if (remappedIndex == m_invalidIndex) { // The current output index was not used last frame. Get an unused entry. remappedIndex = FindRemappedIndex(m_invalidIndex); } if (remappedIndex == m_invalidIndex) { // No unused entry exists. Indicates a problem with MaxDPB. if (g_hTracelogging) { TraceLoggingWrite(g_hTracelogging, "Decode - No available reference map entry for output."); } ThrowFailure(E_INVALIDARG); } ReferenceData& referenceData = referenceDatas[remappedIndex]; // Set the index as the key in this map entry. referenceData.originalIndex = index; referenceData.decoderHeap = decoderHeap; // When IsReferenceOnly is true, then the translation layer is managing references // either becasue the layout is incompatible with other texture usage (REFERENCE_ONLY), or because and/or // decode output conversion is enabled. if (!IsReferenceOnly()) { referenceData.referenceTexture = pTexture2D; referenceData.subresourceIndex = subresourceIndex; } // Store the index to use for error handling when caller specifies and invalid reference index. m_currentOutputIndex = remappedIndex; return remappedIndex; } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void ReferenceDataManager::TransitionReferenceOnlyOutput(ID3D12Resource*& pOutputReferenceNoRef, UINT& OutputSubresource) { assert(IsReferenceOnly()); ReferenceData& referenceData = referenceDatas[m_currentOutputIndex]; TransitionReference(referenceData, D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE); pOutputReferenceNoRef = referenceData.referenceTexture->GetUnderlyingResource(); OutputSubresource = referenceData.subresourceIndex; } //---------------------------------------------------------------------------------------------------------------------------------- void ReferenceDataManager::MarkReferenceInUse(UINT16 index) { if (index != m_invalidIndex) { UINT16 remappedIndex = FindRemappedIndex(index); if (remappedIndex != m_invalidIndex) { referenceDatas[remappedIndex].fUsed = true; } } } //---------------------------------------------------------------------------------------------------------------------------------- void ReferenceDataManager::ReleaseUnusedReferences() { for (ReferenceData& referenceData : referenceDatas) { if (!referenceData.fUsed) { referenceData.decoderHeap = nullptr; if (!IsReferenceOnly()) { referenceData.referenceTexture = nullptr; referenceData.subresourceIndex = 0; } referenceData.originalIndex = m_invalidIndex; } } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void ReferenceDataManager::Resize(UINT16 dpb, ReferenceOnlyDesc* pReferenceOnly, bool fArrayOfTexture) { m_fArrayOfTexture = fArrayOfTexture; ResizeDataStructures(dpb); ResetInternalTrackingReferenceUsage(); ResetReferenceFramesInformation(); ReleaseUnusedReferences(); m_fReferenceOnly = pReferenceOnly != nullptr; if (m_fReferenceOnly) { ResourceCreationArgs ResourceArgs = {}; if (fArrayOfTexture) { ResourceArgs.m_desc12 = CD3DX12_RESOURCE_DESC::Tex2D(pReferenceOnly->Format, pReferenceOnly->Width, pReferenceOnly->Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); ResourceArgs.m_appDesc = AppResourceDesc(ResourceArgs.m_desc12, RESOURCE_USAGE_DEFAULT, RESOURCE_CPU_ACCESS_NONE, RESOURCE_BIND_DECODER); UINT64 resourceSize = 0; m_pImmediateContext->m_pDevice12->GetCopyableFootprints(&ResourceArgs.m_desc12, 0, 1, 0, nullptr, nullptr, nullptr, &resourceSize); ResourceArgs.m_heapDesc = CD3DX12_HEAP_DESC(resourceSize, m_pImmediateContext->GetHeapProperties(D3D12_HEAP_TYPE_DEFAULT)); for (ReferenceData& referenceData : referenceDatas) { if ( !referenceData.referenceOnlyTexture || 0 != memcmp(referenceData.referenceOnlyTexture->Parent(), &ResourceArgs, sizeof(ResourceCreationArgs))) { referenceData.referenceOnlyTexture = Resource::CreateResource(m_pImmediateContext, ResourceArgs, ResourceAllocationContext::ImmediateContextThreadLongLived); assert(0 == memcmp(referenceData.referenceOnlyTexture->Parent(), &ResourceArgs, sizeof(ResourceCreationArgs))); } referenceData.referenceTexture = referenceData.referenceOnlyTexture.get(); referenceData.subresourceIndex = 0u; } } else { ResourceArgs.m_desc12 = CD3DX12_RESOURCE_DESC::Tex2D(pReferenceOnly->Format, pReferenceOnly->Width, pReferenceOnly->Height, dpb, 1, 1, 0, D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE); ResourceArgs.m_appDesc = AppResourceDesc(ResourceArgs.m_desc12, RESOURCE_USAGE_DEFAULT, RESOURCE_CPU_ACCESS_NONE, RESOURCE_BIND_DECODER); UINT64 resourceSize = 0; m_pImmediateContext->m_pDevice12->GetCopyableFootprints(&ResourceArgs.m_desc12, 0, 1, 0, nullptr, nullptr, nullptr, &resourceSize); ResourceArgs.m_heapDesc = CD3DX12_HEAP_DESC(resourceSize, m_pImmediateContext->GetHeapProperties(D3D12_HEAP_TYPE_DEFAULT)); unique_comptr spReferenceOnlyTextureArray = Resource::CreateResource(m_pImmediateContext, ResourceArgs, ResourceAllocationContext::ImmediateContextThreadLongLived); for (size_t i = 0; i < referenceDatas.size(); i++) { referenceDatas[i].referenceOnlyTexture = spReferenceOnlyTextureArray.get(); referenceDatas[i].referenceTexture = spReferenceOnlyTextureArray.get(); referenceDatas[i].subresourceIndex = static_cast(i); } } } } //---------------------------------------------------------------------------------------------------------------------------------- void ReferenceDataManager::ResizeDataStructures(UINT size) { textures.resize(size); texturesSubresources.resize(size); decoderHeapsParameter.resize(size); referenceDatas.resize(size); } //---------------------------------------------------------------------------------------------------------------------------------- void ReferenceDataManager::ResetReferenceFramesInformation() { for (UINT index = 0; index < Size(); index++) { textures[index] = nullptr; texturesSubresources[index] = 0; decoderHeapsParameter[index] = nullptr; } } //---------------------------------------------------------------------------------------------------------------------------------- void ReferenceDataManager::ResetInternalTrackingReferenceUsage() { for (UINT index = 0; index < Size(); index++) { referenceDatas[index].fUsed = false; } } //---------------------------------------------------------------------------------------------------------------------------------- _Use_decl_annotations_ void ReferenceDataManager::TransitionReference(ReferenceData& referenceData, D3D12_RESOURCE_STATES decodeState) { AppResourceDesc* pAppDesc = referenceData.referenceTexture->AppDesc(); VIDEO_PROCESSOR_INPUT_VIEW_DESC_INTERNAL viewDesc = { pAppDesc->Format(), /*MipSlice=*/ 0, /*ArraySlice=*/ referenceData.subresourceIndex}; const UINT8 MipLevels = pAppDesc->MipLevels(); const UINT16 ArraySize = pAppDesc->ArraySize(); const UINT8 PlaneCount = (referenceData.referenceTexture->SubresourceMultiplier() * pAppDesc->NonOpaquePlaneCount()); CViewSubresourceSubset SubresourceSubset(viewDesc, MipLevels, ArraySize, PlaneCount); m_pImmediateContext->GetResourceStateManager().TransitionSubresources( referenceData.referenceTexture, SubresourceSubset, decodeState, COMMAND_LIST_TYPE::VIDEO_DECODE); } }; ================================================ FILE: src/View.cpp ================================================ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. #include "pch.h" namespace D3D12TranslationLayer { void ViewBase::UsedInCommandList(COMMAND_LIST_TYPE commandListType, UINT64 id) { if (m_pResource) { m_pResource->UsedInCommandList(commandListType, id); } } //---------------------------------------------------------------------------------------------------------------------------------- UAV::UAV(ImmediateContext* pDevice, const TTranslationLayerDesc &Desc, Resource &ViewResource) noexcept(false) : TUAV(pDevice, Desc.m_Desc12, ViewResource), m_D3D11UAVFlags(Desc.m_D3D11UAVFlags) { } UAV::~UAV() noexcept(false) { AddToDeferredDeletionQueue(m_pCounterResource); } //---------------------------------------------------------------------------------------------------------------------------------- void UAV::EnsureCounterResource() noexcept(false) { // This is called by an immediate context operation // to ensure that access to the command list is synchronized const D3D12_UNORDERED_ACCESS_VIEW_DESC& Desc12 = GetDesc12(); // If either the Append or Counter bits are set and the counter resource has not been created // then create a resource to hold the count const UINT CounterFlags = D3D11_BUFFER_UAV_FLAG_APPEND | D3D11_BUFFER_UAV_FLAG_COUNTER; if (!m_pCounterResource && ((D3D12_UAV_DIMENSION_BUFFER == Desc12.ViewDimension) && (m_D3D11UAVFlags & CounterFlags))) { unique_comptr pCounterResource; // Use a readback heap on drivers that don't support the heap DDIs // becuase that is the only way to create a resource in the COPY_DEST state D3D12_HEAP_PROPERTIES HeapProp = m_pParent->GetHeapProperties( D3D12_HEAP_TYPE_DEFAULT ); D3D12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer( sizeof(UINT), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS ); HRESULT hr = m_pParent->m_pDevice12->CreateCommittedResource( &HeapProp, D3D12_HEAP_FLAG_NONE, &ResourceDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&pCounterResource) ); if (FAILED(hr)) { ThrowFailure(hr); } // Initialize the counter to 0 UINT InitialData = 0; UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, m_pParent->GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); m_pParent->CopyDataToBuffer( pCounterResource.get(), 0, &InitialData, sizeof(InitialData) ); // throw( _com_error ) // No more failures after this point m_pCounterResource = std::move(pCounterResource); // Transition the counter to the UAV state D3D12_RESOURCE_BARRIER BarrierDesc; ZeroMemory(&BarrierDesc, sizeof(BarrierDesc)); BarrierDesc.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; BarrierDesc.Transition.pResource = m_pCounterResource.get(); BarrierDesc.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; BarrierDesc.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; BarrierDesc.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; m_pParent->GetGraphicsCommandList()->ResourceBarrier(1, &BarrierDesc); } } //---------------------------------------------------------------------------------------------------------------------------------- void UAV::UpdateCounterValue(UINT Value) { // Early-out if this is not a counter UAV if (!m_pCounterResource) { return; } // Transition the counter to the CopyDest state AutoTransition AutoTransition( m_pParent->GetGraphicsCommandList(), m_pCounterResource.get(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST ); UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, m_pParent->GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); // UpdateSubresource m_pParent->CopyDataToBuffer( m_pCounterResource.get(), 0, &Value, sizeof(Value) ); // throw( _com_error ) } //---------------------------------------------------------------------------------------------------------------------------------- void UAV::CopyCounterToBuffer(ID3D12Resource* pDst, UINT DstOffset) noexcept { // Early-out if there was a previous error which caused the counter resource to not be allocated if (!m_pCounterResource) { return; } // Transition the counter to the CopySource state (and put it back at the end of this function) AutoTransition AutoTransition( m_pParent->GetGraphicsCommandList(), m_pCounterResource.get(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE ); UsedInCommandList(COMMAND_LIST_TYPE::GRAPHICS, m_pParent->GetCommandListID(COMMAND_LIST_TYPE::GRAPHICS)); m_pParent->GetGraphicsCommandList()->CopyBufferRegion( pDst, DstOffset, m_pCounterResource.get(), 0, sizeof(UINT) ); } };