Repository: HansKristian-Work/dxil-spirv Branch: master Commit: 62dbb07f7715 Files: 2195 Total size: 9.0 MB Directory structure: gitextract_06b8vcw_/ ├── .clang-format ├── .gitattributes ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── DESCRIPTORS.md ├── LICENSE.MIT ├── README.md ├── bc/ │ ├── CMakeLists.txt │ ├── cast.hpp │ ├── context.cpp │ ├── context.hpp │ ├── data_structures.hpp │ ├── disassembler.cpp │ ├── function.cpp │ ├── function.hpp │ ├── instruction.cpp │ ├── instruction.hpp │ ├── iterator.hpp │ ├── metadata.cpp │ ├── metadata.hpp │ ├── module.cpp │ ├── module.hpp │ ├── module_dxbc_ir.cpp │ ├── type.cpp │ ├── type.hpp │ ├── value.cpp │ └── value.hpp ├── build_dxc.sh ├── cfg_structurizer.cpp ├── cfg_structurizer.hpp ├── checkout_dxc.sh ├── checkout_llvm.sh ├── copy_reference_shaders.py ├── debug/ │ ├── logging.cpp │ └── logging.hpp ├── descriptor_qa.cpp ├── descriptor_qa.hpp ├── dxbc_spirv_sandbox.cpp ├── dxil-disasm.py ├── dxil.hpp ├── dxil_converter.cpp ├── dxil_converter.hpp ├── dxil_extract.cpp ├── dxil_parser.cpp ├── dxil_parser.hpp ├── dxil_spirv.cpp ├── dxil_spirv_c.cpp ├── dxil_spirv_c.h ├── external/ │ └── CMakeLists.txt ├── format_all.sh ├── ir.hpp ├── link.T ├── llvm_bitcode_parser.cpp ├── llvm_bitcode_parser.hpp ├── memory_stream.cpp ├── memory_stream.hpp ├── meson.build ├── misc/ │ └── structurize_test.cpp ├── node.cpp ├── node.hpp ├── node_pool.cpp ├── node_pool.hpp ├── opcodes/ │ ├── converter_impl.hpp │ ├── dxil/ │ │ ├── dxil_ags.cpp │ │ ├── dxil_ags.hpp │ │ ├── dxil_arithmetic.cpp │ │ ├── dxil_arithmetic.hpp │ │ ├── dxil_buffer.cpp │ │ ├── dxil_buffer.hpp │ │ ├── dxil_common.cpp │ │ ├── dxil_common.hpp │ │ ├── dxil_compute.cpp │ │ ├── dxil_compute.hpp │ │ ├── dxil_geometry.cpp │ │ ├── dxil_geometry.hpp │ │ ├── dxil_mesh.cpp │ │ ├── dxil_mesh.hpp │ │ ├── dxil_nvapi.cpp │ │ ├── dxil_nvapi.hpp │ │ ├── dxil_pixel_ops.cpp │ │ ├── dxil_pixel_ops.hpp │ │ ├── dxil_ray_tracing.cpp │ │ ├── dxil_ray_tracing.hpp │ │ ├── dxil_resources.cpp │ │ ├── dxil_resources.hpp │ │ ├── dxil_sampling.cpp │ │ ├── dxil_sampling.hpp │ │ ├── dxil_tessellation.cpp │ │ ├── dxil_tessellation.hpp │ │ ├── dxil_waveops.cpp │ │ ├── dxil_waveops.hpp │ │ ├── dxil_workgraph.cpp │ │ └── dxil_workgraph.hpp │ ├── opcodes.hpp │ ├── opcodes_dxil_builtins.cpp │ ├── opcodes_dxil_builtins.hpp │ ├── opcodes_llvm_builtins.cpp │ └── opcodes_llvm_builtins.hpp ├── pkg-config/ │ └── dxil-spirv-c-shared.pc.in ├── reference/ │ └── shaders/ │ ├── ags/ │ │ ├── ags.ssbo.comp │ │ ├── cs_constexpr_wmma_gep.sm66.full-wmma.ssbo.comp │ │ ├── cs_constexpr_wmma_gep.sm66.ssbo.comp │ │ ├── cs_wmma_alloca.sm66.ssbo.comp │ │ ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.comp │ │ ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_extract_insert.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_at.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_bt.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_ct.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_ot.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_fp8.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f16.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided_transpose.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f32.sm66.ssbo.comp │ │ ├── cs_wmma_fp16_fp8_conversions.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_fp32_fp16_conversions.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_fp32_fp8_conversions.sm66.ssbo.nv-coopmat2.comp │ │ ├── cs_wmma_fp8_fp32_conversions.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_lds_transpose.sm66.ssbo.comp │ │ ├── cs_wmma_matrix_length.sm66.ssbo.comp │ │ ├── cs_wmma_store_phi.full-wmma.sm66.ssbo.comp │ │ └── cs_wmma_store_phi.sm66.ssbo.comp │ ├── alloca-opts/ │ │ ├── bad-stride.frag │ │ ├── double-array-load.frag │ │ ├── float4-array-load.bindless.frag │ │ ├── float4-array-load.bindless.root-constants.frag │ │ ├── float4-array-load.frag │ │ ├── float4-array-load.root-constant.frag │ │ ├── float4-array-load.root-descriptor.frag │ │ ├── float4-array-load.root-descriptor.root-constants.frag │ │ ├── load-different.frag │ │ ├── local-root-constants.local-root-signature.rgen │ │ ├── matrix-load.frag │ │ ├── missing-first.frag │ │ ├── missing-last-element.frag │ │ ├── out-of-order-load.frag │ │ ├── store-after-load.frag │ │ └── uint4-array-load.frag │ ├── asm/ │ │ ├── bfi.bc.dxil │ │ ├── cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil │ │ ├── cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil │ │ ├── constant-struct-aggregate.bc.dxil │ │ ├── control-flow-multi-break-with-non-idom-loop-header.dxil │ │ ├── ibfe.bc.dxil │ │ └── ubfe.bc.dxil │ ├── auto-barrier/ │ │ ├── complex-loop.auto-group-shared-barrier.comp │ │ ├── inner-to-inner.auto-group-shared-barrier.comp │ │ ├── inner-to-outer.auto-group-shared-barrier.comp │ │ ├── outer-to-inner.auto-group-shared-barrier.comp │ │ ├── single-block-loop.auto-group-shared-barrier.comp │ │ └── single-block.auto-group-shared-barrier.comp │ ├── control-flow/ │ │ ├── branch-return-2.comp │ │ ├── branch-return.comp │ │ ├── branch.comp │ │ ├── conditional-break-into-if-else-if-ladder-2.comp │ │ ├── conditional-break-into-if-else-if-ladder.comp │ │ ├── dual-inner-loop-early-return.comp │ │ ├── if-else-if-into-continue.comp │ │ ├── inner-loop-early-return.comp │ │ ├── interleaved-unrolled-loop-breaks.comp │ │ ├── loop-break-2.comp │ │ ├── loop-break.comp │ │ ├── loop-continue-2.comp │ │ ├── loop-continue-3.comp │ │ ├── loop-continue.comp │ │ ├── loop-inside-infinite-loop-2.frag │ │ ├── loop-inside-infinite-loop.frag │ │ ├── loop-return.comp │ │ ├── loop.comp │ │ ├── nested-loop-break-2.comp │ │ ├── nested-loop-break.comp │ │ ├── nested-loop.comp │ │ ├── selection-merge-split-post-domination.frag │ │ ├── switch-continue.frag │ │ ├── switch-merge-into-other-merge.comp │ │ ├── switch-shared-header-with-loop.comp │ │ └── wave-size-dependent-loop-unroll.comp │ ├── descriptor_qa/ │ │ ├── acceleration-structure.bindless.descriptor-qa.rgen │ │ ├── acceleration-structure.bindless.descriptor-qa.sm66.rgen │ │ ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.descriptor-qa.rgen │ │ ├── descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp │ │ ├── descriptor_qa.bindless.descriptor-qa.comp │ │ ├── descriptor_qa.bindless.descriptor-qa.sm66.comp │ │ ├── descriptor_qa.bindless.ssbo.descriptor-qa.comp │ │ ├── early-2.bindless.descriptor-qa.frag │ │ ├── early-3.bindless.descriptor-qa.frag │ │ ├── early-4.bindless.descriptor-qa.frag │ │ ├── early-5.bindless.descriptor-qa.frag │ │ ├── early-heap.descriptor-qa.sm66.frag │ │ └── early.bindless.descriptor-qa.frag │ ├── dxil-builtin/ │ │ ├── accept-hit-and-end-search-ignore-hit.rany │ │ ├── acos.frag │ │ ├── asin.frag │ │ ├── atan.frag │ │ ├── atomic-bin-op.bindless.root-constant.frag │ │ ├── atomic-bin-op.frag │ │ ├── atomic-bin-op.root-descriptor.comp │ │ ├── atomic-bin-op.ssbo.frag │ │ ├── atomic-compare-exchange.frag │ │ ├── atomic-compare-exchange.root-descriptor.comp │ │ ├── atomic-compare-exchange.ssbo.frag │ │ ├── attributes.denorm-ftz.comp │ │ ├── attributes.denorm-preserve.comp │ │ ├── barrier.comp │ │ ├── barycentrics-2.frag │ │ ├── barycentrics.frag │ │ ├── bfrev.frag │ │ ├── bitcount-bitrev-sizes.ssbo.comp │ │ ├── buffer-load-feedback.frag │ │ ├── buffer-load-signed-feedback.frag │ │ ├── buffer-load-signed.frag │ │ ├── buffer-load.frag │ │ ├── buffer-load.ssbo.frag │ │ ├── buffer-store-signed.frag │ │ ├── buffer-store.frag │ │ ├── buffer-store.ssbo.frag │ │ ├── buffer-update-counter.frag │ │ ├── calculate-lod.frag │ │ ├── call-shader.rgen │ │ ├── clip.demote-to-helper.frag │ │ ├── clip.frag │ │ ├── compute-shader-derivatives-cube-array.noderivs.sm66.ssbo.comp │ │ ├── compute-shader-derivatives-cube.noderivs.sm66.ssbo.comp │ │ ├── compute-shader-derivatives-single-thread.sm66.ssbo.comp │ │ ├── compute-shader-derivatives.noderivs.sm66.ssbo.comp │ │ ├── cos.frag │ │ ├── countbits.frag │ │ ├── coverage.frag │ │ ├── derivative.frag │ │ ├── derivative.sm60.frag │ │ ├── derivative.sm60.native-fp16.frag │ │ ├── derivatives.sm66.comp │ │ ├── discard.demote-to-helper.frag │ │ ├── discard.frag │ │ ├── dispatch-rays-dimensions.rgen │ │ ├── dispatch-rays-index.rgen │ │ ├── dot2.frag │ │ ├── dot3.frag │ │ ├── dot4.frag │ │ ├── eval-centroid.frag │ │ ├── eval-sample-index.frag │ │ ├── eval-snapped.frag │ │ ├── exp.frag │ │ ├── f16-to-f32.frag │ │ ├── f32-to-f16.frag │ │ ├── fabs.frag │ │ ├── firstbithi-16.sm62.frag │ │ ├── firstbithi-64.frag │ │ ├── firstbithi.frag │ │ ├── firstbitlo-16.sm62.frag │ │ ├── firstbitlo-64.frag │ │ ├── firstbitlo.frag │ │ ├── firstbitshi-16.sm62.frag │ │ ├── firstbitshi-64.frag │ │ ├── firstbitshi.frag │ │ ├── flattened_thread_id_in_group.comp │ │ ├── fma.frag │ │ ├── fmad-precise.frag │ │ ├── fmad.frag │ │ ├── fmax.frag │ │ ├── fmin.frag │ │ ├── frc.frag │ │ ├── get-dimensions-w-only.frag │ │ ├── get-dimensions-xyz-only.frag │ │ ├── get-dimensions.bindless.root-constant.frag │ │ ├── get-dimensions.bindless.root-constant.ssbo.frag │ │ ├── get-dimensions.frag │ │ ├── get-dimensions.ssbo.frag │ │ ├── group_id.comp │ │ ├── hcos.frag │ │ ├── hsin.frag │ │ ├── htan.frag │ │ ├── imad.frag │ │ ├── imax.frag │ │ ├── imin.frag │ │ ├── instance-id.vert │ │ ├── is-helper-lane-2.demote-to-helper.sm66.frag │ │ ├── is-helper-lane-2.sm66.frag │ │ ├── is-helper-lane.demote-to-helper.sm66.frag │ │ ├── is-helper-lane.sm66.frag │ │ ├── isfinite.frag │ │ ├── isinf.frag │ │ ├── isnan.frag │ │ ├── log.frag │ │ ├── make-double.frag │ │ ├── msaa-uav.sm67.comp │ │ ├── msad.comp │ │ ├── object-ray-direction.rany │ │ ├── object-ray-origin.rany │ │ ├── object-to-world-3x4.rany │ │ ├── object-to-world-4x3.rany │ │ ├── pack-unpack.ssbo.sm66.comp │ │ ├── quad-all-any.sm67.comp │ │ ├── quad-all-any.sm67.quad-maximal-reconvergence.noglsl.comp │ │ ├── quad-read-at-2d.comp │ │ ├── quad-read-at-2d.sm66.comp │ │ ├── quad-read-at.comp │ │ ├── quad-read-at.frag │ │ ├── quad-swap.comp │ │ ├── quad-swap.frag │ │ ├── raw-gather-offset-sparse.sm67.ssbo.comp │ │ ├── raw-gather-offset.sm67.ssbo.comp │ │ ├── raw-gather-sparse.sm67.ssbo.comp │ │ ├── raw-gather.sm67.ssbo.comp │ │ ├── ray-query-phi-multi.invalid.sm66.comp │ │ ├── ray-query-phi-simple.sm66.comp │ │ ├── ray-query-select-multi.invalid.sm66.comp │ │ ├── ray-query-select-simple.sm66.comp │ │ ├── ray-query-store-multi.invalid.sm66.comp │ │ ├── ray-query-store-simple.sm66.comp │ │ ├── ray-query.comp │ │ ├── ray-t-current.rany │ │ ├── ray-t-min.rany │ │ ├── render-target-sample-count.frag │ │ ├── render-target-sample-position.frag │ │ ├── report-hit.rint │ │ ├── round-ne.frag │ │ ├── round-ni.frag │ │ ├── round-pi.frag │ │ ├── round-z.frag │ │ ├── rsqrt.frag │ │ ├── rt-geometry-index.rany │ │ ├── rt-hit-kind.rany │ │ ├── rt-instance-id.rany │ │ ├── rt-instance-index.rany │ │ ├── rt-primitive-index.rany │ │ ├── rt-ray-flags.rany │ │ ├── sample-bias-feedback.frag │ │ ├── sample-bias-offset.frag │ │ ├── sample-bias.frag │ │ ├── sample-cmp-bias-feedback.frag │ │ ├── sample-cmp-bias-offset.frag │ │ ├── sample-cmp-bias.frag │ │ ├── sample-cmp-feedback.frag │ │ ├── sample-cmp-grad-offset-feedback.frag │ │ ├── sample-cmp-grad-offset.frag │ │ ├── sample-cmp-grad.frag │ │ ├── sample-cmp-level.sm67.noglsl.frag │ │ ├── sample-cmp-levelzero.frag │ │ ├── sample-cmp-offset-levelzero-feedback.frag │ │ ├── sample-cmp-offset-levelzero.frag │ │ ├── sample-cmp-offset.frag │ │ ├── sample-cmp.frag │ │ ├── sample-grad-offset-dynamic.noglsl.invalid.sm67.frag │ │ ├── sample-grad-offset-feedback.frag │ │ ├── sample-grad-offset.frag │ │ ├── sample-grad.frag │ │ ├── sample-id.frag │ │ ├── sample-level-offset-feedback.frag │ │ ├── sample-level-offset.frag │ │ ├── sample-level.frag │ │ ├── sample-offset-dynamic.noglsl.invalid.sm67.frag │ │ ├── sample-offset.frag │ │ ├── sample.frag │ │ ├── saturate.frag │ │ ├── sin.frag │ │ ├── sm64-packed-arithmetic.ssbo.comp │ │ ├── sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp │ │ ├── sm64-packed-arithmetic.ssbo.mixed-float-dot-product.noglsl.comp │ │ ├── split-double.frag │ │ ├── sqrt.frag │ │ ├── tan.frag │ │ ├── texture-gather-4offset.frag │ │ ├── texture-gather-cmp-offset-feedback.frag │ │ ├── texture-gather-cmp-offset.frag │ │ ├── texture-gather-cmp.frag │ │ ├── texture-gather-offset.frag │ │ ├── texture-gather-signed-feedback.frag │ │ ├── texture-gather-signed.frag │ │ ├── texture-gather.frag │ │ ├── texture-load-feedback.frag │ │ ├── texture-load-offset-dynamic.sm67.frag │ │ ├── texture-load-offset.frag │ │ ├── texture-load-signed.frag │ │ ├── texture-load.frag │ │ ├── texture-store-signed.frag │ │ ├── texture-store.frag │ │ ├── texture2dms-sample-position.frag │ │ ├── thread_id.comp │ │ ├── thread_id_in_group.comp │ │ ├── trace-ray-flags-2.rgen │ │ ├── trace-ray-flags.rgen │ │ ├── trace-ray.rgen │ │ ├── umad.frag │ │ ├── umax.frag │ │ ├── umin.frag │ │ ├── vertex-id.vert │ │ ├── wave-active-all-true.comp │ │ ├── wave-active-all-true.frag │ │ ├── wave-active-any-true.comp │ │ ├── wave-active-any-true.frag │ │ ├── wave-active-ballot-discard.demote-to-helper.frag │ │ ├── wave-active-ballot-discard.frag │ │ ├── wave-active-ballot.comp │ │ ├── wave-active-ballot.demote-to-helper.frag │ │ ├── wave-active-ballot.frag │ │ ├── wave-active-count-bits.comp │ │ ├── wave-active-count-bits.frag │ │ ├── wave-all-equal.comp │ │ ├── wave-all-equal.frag │ │ ├── wave-get-lane-count.comp │ │ ├── wave-get-lane-index.comp │ │ ├── wave-is-first-lane.comp │ │ ├── wave-is-first-lane.frag │ │ ├── wave-match.comp │ │ ├── wave-match.frag │ │ ├── wave-match.partitioned.noglsl.comp │ │ ├── wave-match.partitioned.noglsl.frag │ │ ├── wave-multi-prefix-count-bits.comp │ │ ├── wave-multi-prefix-count-bits.frag │ │ ├── wave-multi-prefix-op.comp │ │ ├── wave-multi-prefix-op.frag │ │ ├── wave-multi-prefix-op.partitioned.noglsl.comp │ │ ├── wave-multi-prefix-op.partitioned.noglsl.frag │ │ ├── wave-prefix.comp │ │ ├── wave-prefix.frag │ │ ├── wave-read-lane-at-optimizations.comp │ │ ├── wave-read-lane-at.comp │ │ ├── wave-read-lane-first.comp │ │ ├── wave-read-lane-first.frag │ │ ├── wave-reduce-helpers.sm67.frag │ │ ├── wave-reduce-helpers.sm67.quad-maximal-reconvergence.frag │ │ ├── wave-reduce.comp │ │ ├── wave-reduce.frag │ │ ├── wave-size.sm66.comp │ │ ├── world-ray-direction.rany │ │ ├── world-ray-origin.rany │ │ ├── world-to-object-3x4.rany │ │ └── world-to-object-4x3.rany │ ├── fp16/ │ │ ├── saturate.frag │ │ ├── saturate.sm60.frag │ │ └── saturate.sm60.native-fp16.frag │ ├── heap-robustness/ │ │ ├── misc.bindless.heap-raw-va-cbv.sm66.ssbo.comp │ │ ├── misc.bindless.heap-robustness.heap-robustness-cbv.sm66.ssbo.comp │ │ ├── misc.bindless.heap-robustness.sm66.ssbo.comp │ │ ├── misc.bindless.sm66.ssbo.comp │ │ └── misc.heap-robustness.bindless.heap-robustness-cbv.heap-raw-va-cbv.sm66.ssbo.comp │ ├── instrumentation/ │ │ ├── atomics-raw.bindless.bda-instrumentation.ssbo.comp │ │ ├── atomics-raw.root-descriptor.bda-instrumentation.ssbo.comp │ │ ├── atomics-structured-counter.bindless.bda-instrumentation.ssbo.comp │ │ ├── atomics-structured.bindless.bda-instrumentation.ssbo.comp │ │ ├── atomics-structured.root-descriptor.bda-instrumentation.ssbo.comp │ │ ├── atomics-typed.bindless.bda-instrumentation.ssbo.comp │ │ ├── cbv.bindless.bda-instrumentation.comp │ │ ├── cbv.root-descriptor.bda-instrumentation.comp │ │ ├── raw.bindless.bda-instrumentation.ssbo.comp │ │ ├── raw.root-descriptor.bda-instrumentation.ssbo.comp │ │ ├── structured.bindless.bda-instrumentation.comp │ │ ├── structured.bindless.bda-instrumentation.ssbo.comp │ │ ├── structured.root-descriptor.bda-instrumentation.ssbo.comp │ │ └── typed.bindless.bda-instrumentation.comp │ ├── llvm-builtin/ │ │ ├── alloca-robustness-cases.extended-robustness.vert │ │ ├── alloca.frag │ │ ├── atomic-bin-op.comp │ │ ├── atomic-compare-exchange.comp │ │ ├── atomic-compare-exchange.sm66.ssbo.comp │ │ ├── bool-to-fp.frag │ │ ├── constant-expression-cast.comp │ │ ├── constant-expression-gep.comp │ │ ├── fadd.frag │ │ ├── fast-mul-div-pair.comp │ │ ├── fcmp_eq.frag │ │ ├── fcmp_ge.frag │ │ ├── fcmp_gt.frag │ │ ├── fcmp_le.frag │ │ ├── fcmp_lt.frag │ │ ├── fcmp_ne.frag │ │ ├── fdiv.frag │ │ ├── fmul.frag │ │ ├── frem.frag │ │ ├── fsub.frag │ │ ├── glitched-integer-width.comp │ │ ├── groupshared.comp │ │ ├── icmp_eq.frag │ │ ├── icmp_ne.frag │ │ ├── icmp_sge.frag │ │ ├── icmp_sgt.frag │ │ ├── icmp_sle.frag │ │ ├── icmp_slt.frag │ │ ├── icmp_uge.frag │ │ ├── icmp_ugt.frag │ │ ├── icmp_ule.frag │ │ ├── icmp_ult.frag │ │ ├── logical-and.frag │ │ ├── logical-equal.frag │ │ ├── logical-not-equal.frag │ │ ├── logical-or.frag │ │ ├── lut.frag │ │ ├── min16-phi.sm60.comp │ │ ├── precise_math.frag │ │ └── zext-bool.frag │ ├── memory-model/ │ │ ├── uav-coherent-promotion.bindless.ssbo.comp │ │ ├── uav-coherent-promotion.root-descriptor.ssbo.comp │ │ ├── uav-coherent-promotion.sm66.bindless.ssbo.comp │ │ ├── uav-coherent-promotion.sm66.ssbo.comp │ │ ├── uav-coherent-promotion.ssbo.comp │ │ ├── uav-coherent.root-descriptor.ssbo.comp │ │ ├── uav-coherent.sm66.ssbo.comp │ │ └── uav-coherent.ssbo.comp │ ├── nvapi/ │ │ ├── bringup.nvapi.ssbo.rgen │ │ ├── get-special-global-timer.nvapi.ssbo.rgen │ │ ├── hit-object.local-root-signature.noglsl.nvapi.ssbo.rgen │ │ ├── ray-query-cluster-id.nvapi.comp │ │ ├── rt-cluster-id.nvapi.rany │ │ └── shuffle.nvapi.ssbo.comp │ ├── opts/ │ │ ├── fp16-fp32-fp16-1.ssbo.comp │ │ ├── sabs.frag │ │ ├── sneg.frag │ │ ├── wave-read-lane-first-heap.sm66.comp │ │ ├── wave-read-lane-first.bindless.local-root-signature.rmiss │ │ ├── wave-read-lane-first.comp │ │ ├── wave-read-lane-first.no-legacy-cbuf-layout.comp │ │ ├── wave-read-lane-first.no-legacy-cbuf-layout.sm60.comp │ │ ├── wave-read-lane-first.sm60.comp │ │ ├── wave-read-lane-first.sm66.comp │ │ ├── wave-read-lane-first.ssbo.comp │ │ ├── wave-read-lane-first.ssbo.rgen │ │ ├── wave-read-lane-first.ssbo.sm60.comp │ │ ├── wave-read-lane-first.ssbo.sm66.comp │ │ └── wave-read-lane-first.ssbo.sm66.rgen │ ├── raw-access/ │ │ ├── bab-double1.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-double2.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-double3.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-double4.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float1.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float2.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float3.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float4.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float4x4.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float1.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float2.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float3.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float4.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float4x4.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-half1.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.comp │ │ ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp │ │ ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.comp │ │ ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp │ │ ├── structured-uint1.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-uint2.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-uint3.raw-access-chains.noglsl.ssbo.comp │ │ └── structured-uint4.raw-access-chains.noglsl.ssbo.comp │ ├── resources/ │ │ ├── acceleration-structure.bindless.rgen │ │ ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.raw-va-stride-offset.rgen │ │ ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.rgen │ │ ├── acceleration-structure.local-root-signature.root-descriptor.rgen │ │ ├── basic.input-attachment.frag │ │ ├── buffer-16bit.ssbo.bindless.comp │ │ ├── buffer-16bit.ssbo.bindless.ssbo-align.comp │ │ ├── buffer-16bit.ssbo.comp │ │ ├── buffer-64bit.ssbo.bindless.ssbo-align.comp │ │ ├── buffer-64bit.ssbo.comp │ │ ├── buffer-alignment-fixup.bindless.root-constant.offset-layout.typed-buffer-offset.comp │ │ ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.comp │ │ ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.offset-layout.typed-buffer-offset.comp │ │ ├── cbuf.root-constant.min16float.sm60.frag │ │ ├── cbuf.root-constant.min16float.sm60.native-fp16.frag │ │ ├── cbuf.root-constant.min16int.sm60.frag │ │ ├── cbuf.root-constant.min16int.sm60.native-fp16.frag │ │ ├── cbv-array-nonuniform.frag │ │ ├── cbv-array.frag │ │ ├── cbv-dynamic.no-legacy-cbuf-layout.local-root-signature.rmiss │ │ ├── cbv-indexing.frag │ │ ├── cbv-indexing.sm66.frag │ │ ├── cbv-legacy-fp16-fp64.frag │ │ ├── cbv-legacy-fp16-fp64.root-descriptor.frag │ │ ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.frag │ │ ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag │ │ ├── cbv-legacy-fp16-fp64.sm60.frag │ │ ├── cbv-legacy-fp16-fp64.sm60.native-fp16.frag │ │ ├── cbv.bindless.root-constant.cbv-as-ssbo.frag │ │ ├── cbv.bindless.root-constant.frag │ │ ├── cbv.frag │ │ ├── cbv.no-legacy-cbuf-layout.bindless.frag │ │ ├── cbv.no-legacy-cbuf-layout.index-divider.frag │ │ ├── cbv.no-legacy-cbuf-layout.local-root-signature.rmiss │ │ ├── cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag │ │ ├── cbv.no-legacy-cbuf-layout.root-constant.frag │ │ ├── cbv.root-constant.frag │ │ ├── cbv.root-descriptor.no-legacy-cbuf-layout.frag │ │ ├── combined-image-sampler-reuse.frag │ │ ├── dynamic-root-constant.root-constant.bindless.root-descriptor.comp │ │ ├── min16-alloca-groupshared.sm60.comp │ │ ├── min16float-ssbo-dxr.ssbo.rgen │ │ ├── raw-buffer-addressing.comp │ │ ├── raw-buffer-addressing.ssbo.comp │ │ ├── root-bda.root-descriptor.comp │ │ ├── root-bda.root-descriptor.sm60.comp │ │ ├── root-constant-with-bda.root-descriptor.root-constant.comp │ │ ├── rt-resources.bindless.local-root-signature.rmiss │ │ ├── rt-resources.bindless.rmiss │ │ ├── rt-resources.rmiss │ │ ├── sampler-array.frag │ │ ├── sampler-indexing.frag │ │ ├── sampler-indexing.sm66.frag │ │ ├── sampler.bindless.root-constant.frag │ │ ├── sm66/ │ │ │ ├── atomics-64bit-groupshared.ssbo.sm66.comp │ │ │ ├── atomics-64bit.root-descriptor.sm66.comp │ │ │ ├── atomics-64bit.ssbo.sm66.comp │ │ │ ├── atomics-component-alias.sm66.comp │ │ │ ├── atomics-typed-64bit-heap.sm66.comp │ │ │ ├── atomics-typed-64bit.bindless.sm66.comp │ │ │ ├── atomics-typed-64bit.sm66.comp │ │ │ ├── binding-range-selection.bindless.sm66.comp │ │ │ ├── binding-range-selection.sm66.comp │ │ │ ├── buffer-64bit-double.ssbo.sm66.comp │ │ │ ├── buffer-64bit.ssbo.sm66.comp │ │ │ ├── buffer-64bit.ssbo.ssbo-align.sm66.comp │ │ │ ├── cbuffer-heap.sm66.frag │ │ │ ├── cbv.no-legacy-cbuf-layout.bindless.sm66.frag │ │ │ ├── cbv.no-legacy-cbuf-layout.sm66.frag │ │ │ ├── raw-buffer-heap.sm66.frag │ │ │ ├── raw-buffer-heap.ssbo.sm66.frag │ │ │ ├── raw-buffer-heap.typed-buffer-offset.sm66.frag │ │ │ ├── raw-buffers-binding.ssbo.bindless.sm66.frag │ │ │ ├── raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag │ │ │ ├── raw-buffers-binding.ssbo.sm66.frag │ │ │ ├── raygen-heap.sm66.rgen │ │ │ ├── raygen-heap.ssbo-rtas.raw-va-stride-offset.sm66.rgen │ │ │ ├── raygen-heap.ssbo-rtas.sm66.rgen │ │ │ ├── raygen.sm66.rgen │ │ │ ├── raygen.ssbo-rtas.bindless.raw-va-stride-offset.sm66.rgen │ │ │ ├── raygen.ssbo-rtas.bindless.sm66.rgen │ │ │ ├── rw-typed-binding.sm66.frag │ │ │ ├── rw-typed-heap.sm66.frag │ │ │ ├── sampled-types-binding.sm66.frag │ │ │ ├── sampled-types.sm66.frag │ │ │ ├── sampler-binding.sm66.frag │ │ │ ├── sampler-heap.sm66.frag │ │ │ ├── structured-16bit-heap.ssbo.sm66.frag │ │ │ ├── structured-16bit-heap.ssbo.ssbo-align.sm66.frag │ │ │ ├── structured-buffer-heap.sm66.frag │ │ │ ├── structured-buffer-heap.ssbo.sm66.frag │ │ │ ├── structured-buffer-heap.ssbo.ssbo-align.sm66.frag │ │ │ └── structured-buffer-heap.typed-buffer-offset.sm66.frag │ │ ├── srv-array-raw-buffer-nonuniform.frag │ │ ├── srv-array-raw-buffer.frag │ │ ├── srv-array-structured-buffer-nonuniform.frag │ │ ├── srv-array-structured-buffer.frag │ │ ├── srv-array-texture-nonuniform.frag │ │ ├── srv-array-texture.frag │ │ ├── srv-array-typed-buffer-nonuniform.frag │ │ ├── srv-array-typed-buffer.frag │ │ ├── srv-indexing.frag │ │ ├── srv-indexing.sm66.frag │ │ ├── srv-raw-buffer.bindless.root-constant.frag │ │ ├── srv-raw-buffer.bindless.root-constant.ssbo.frag │ │ ├── srv-raw-buffer.ssbo.frag │ │ ├── srv-structured-buffer.bindless.root-constant.frag │ │ ├── srv-structured-buffer.bindless.root-constant.ssbo.frag │ │ ├── srv-structured-buffer.ssbo.frag │ │ ├── srv-texture.bindless.root-constant.frag │ │ ├── srv-texture.bindless.root-constant.inline-ubo.frag │ │ ├── srv-typed-buffer.bindless.root-constant.frag │ │ ├── srv-uav-raw.typed-buffer-offset.comp │ │ ├── srv-uav.typed-buffer-offset.comp │ │ ├── ssbo-minprecision.sm60.native-fp16.frag │ │ ├── ssbo-minprecision.sm60.ssbo.frag │ │ ├── ssbo-minprecision.sm60.ssbo.native-fp16.frag │ │ ├── ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag │ │ ├── ssbo-minprecision.sm60.ssbo.root-descriptor.frag │ │ ├── subobject-parsing.rgen │ │ ├── typed-resources-16bit-sparse.frag │ │ ├── typed-resources-16bit.bindless.frag │ │ ├── typed-resources-16bit.frag │ │ ├── typed-resources-16bit.sm60.bindless.frag │ │ ├── typed-resources-16bit.sm60.frag │ │ ├── typed-resources-16bit.sm60.native-fp16.bindless.frag │ │ ├── typed-resources-16bit.sm60.native-fp16.frag │ │ ├── uav-array-raw-buffer-nonuniform.frag │ │ ├── uav-array-raw-buffer.frag │ │ ├── uav-array-structured-buffer-nonuniform.frag │ │ ├── uav-array-structured-buffer-nonuniform.ssbo.bindless.root-constant.frag │ │ ├── uav-array-structured-buffer-nonuniform.ssbo.frag │ │ ├── uav-array-structured-buffer.frag │ │ ├── uav-array-texture-nonuniform.frag │ │ ├── uav-array-texture.frag │ │ ├── uav-array-typed-buffer-nonuniform.frag │ │ ├── uav-array-typed-buffer.frag │ │ ├── uav-counter-array.ssbo.frag │ │ ├── uav-counter-array.ssbo.sm66.frag │ │ ├── uav-counter-array.ssbo.sm66.uav-counter-ssbo.frag │ │ ├── uav-counter-array.ssbo.uav-counter-ssbo.frag │ │ ├── uav-counter-heap.sm66.bindless.ssbo.frag │ │ ├── uav-counter-heap.sm66.uav-counter-ssbo.bindless.ssbo.frag │ │ ├── uav-counter-heap.sm66.uav-counter-texel-buffer.bindless.ssbo.frag │ │ ├── uav-counter.bindless.nobda.root-constant.comp │ │ ├── uav-counter.bindless.nobda.root-constant.raw-va-stride-offset.comp │ │ ├── uav-counter.bindless.root-constant.comp │ │ ├── uav-counter.bindless.root-constant.raw-va-stride-offset.comp │ │ ├── uav-counter.bindless.root-constant.raw-va-stride-offset.heap-raw-va-cbv.comp │ │ ├── uav-counter.bindless.root-constant.uav-counter-ssbo.comp │ │ ├── uav-counter.ssbo.comp │ │ ├── uav-counter.ssbo.raw-va-stride-offset.comp │ │ ├── uav-counter.ssbo.uav-counter-ssbo.comp │ │ ├── uav-indexing.frag │ │ ├── uav-indexing.sm66.frag │ │ ├── uav-raw-buffer.bindless.root-constant.frag │ │ ├── uav-raw-buffer.ssbo.frag │ │ ├── uav-structured-buffer.bindless.root-constant.frag │ │ ├── uav-typed-buffer.bindless.root-constant.frag │ │ └── uav-typed.typed-uav-without-format.comp │ ├── rov/ │ │ ├── rov-bab.bindless.frag │ │ ├── rov-bab.frag │ │ ├── rov-bab.ssbo.bindless.frag │ │ ├── rov-bab.ssbo.frag │ │ ├── rov-bab.ssbo.root-descriptor.frag │ │ ├── rov-branch-early-return.frag │ │ ├── rov-branch.frag │ │ ├── rov-buffer.frag │ │ ├── rov-inloop-2.frag │ │ ├── rov-inloop.frag │ │ ├── rov-per-sample.sm66.frag │ │ ├── rov-postloop.frag │ │ ├── rov-structured.bindless.frag │ │ ├── rov-structured.frag │ │ ├── rov-structured.ssbo.bindless.frag │ │ ├── rov-structured.ssbo.frag │ │ ├── rov-structured.ssbo.root-descriptor.frag │ │ ├── rov-tex1d.bindless.frag │ │ ├── rov-tex1d.frag │ │ ├── rov-tex1darray.bindless.frag │ │ ├── rov-tex1darray.frag │ │ ├── rov-tex2d.bindless.frag │ │ ├── rov-tex2d.frag │ │ ├── rov-tex2darray.bindless.frag │ │ ├── rov-tex2darray.frag │ │ ├── rov-tex3d.bindless.frag │ │ ├── rov-tex3d.frag │ │ ├── rov-undef.frag │ │ └── rov.sm66.frag │ ├── sampler-feedback/ │ │ ├── sampler-feedback.frag │ │ └── sampler-feedback.sm66.frag │ ├── semantics/ │ │ ├── clip-cull-distance.vert │ │ ├── clip-cull.frag │ │ ├── clip-distance-cols.frag │ │ ├── clip-distance-cols.vert │ │ ├── clip-distance-flatten.frag │ │ ├── clip-distance-flatten.vert │ │ ├── clip-distance-rows.frag │ │ ├── clip-distance-rows.vert │ │ ├── clip-distance-single.vert │ │ ├── coverage.frag │ │ ├── depth-greater-equal.frag │ │ ├── depth-less-equal.frag │ │ ├── depth.frag │ │ ├── early-depth-stencil.frag │ │ ├── inner-coverage.noglsl.frag │ │ ├── is-front-face.frag │ │ ├── position-short.frag │ │ ├── position-short.vert │ │ ├── position.frag │ │ ├── primitive-id.frag │ │ ├── primitive-id.geom │ │ ├── render-target-array-index.frag │ │ ├── render-target-array-index.geom │ │ ├── sample-rate-pos.frag │ │ ├── stencil-ref.frag │ │ ├── sv-shading-rate.noglsl.frag │ │ ├── sv-shading-rate.noglsl.vert │ │ ├── view-id.frag │ │ ├── view-id.vert │ │ ├── viewport-array-index.frag │ │ └── viewport-array-index.geom │ ├── stages/ │ │ ├── boolean-io.vert │ │ ├── callable-chain.rcall │ │ ├── callable.rcall │ │ ├── closesthit.rclosest │ │ ├── domain-clip-cull.tese │ │ ├── domain-patch-input-integer-io.tese │ │ ├── domain.tese │ │ ├── extra_output.dual-source-blending.frag │ │ ├── extra_output_reordered.dual-source-blending.frag │ │ ├── geometry-clip-cull.geom │ │ ├── geometry-input-line.geom │ │ ├── geometry-input-lineadj.geom │ │ ├── geometry-input-point.geom │ │ ├── geometry-input-triangle.geom │ │ ├── geometry-input-triangleadj.geom │ │ ├── geometry-instancing.geom │ │ ├── geometry-output-line.geom │ │ ├── geometry-output-point.geom │ │ ├── geometry-streams.geom │ │ ├── hull-arrays.tesc │ │ ├── hull-clip-cull.tesc │ │ ├── hull-patch-output-integer-io.tesc │ │ ├── hull-single-cp.tesc │ │ ├── hull.tesc │ │ ├── mesh-basic-line.mesh │ │ ├── mesh-basic.mesh │ │ ├── mesh-clip-cull.mesh │ │ ├── raygen-complex-storage-class.rgen │ │ ├── raygen-skip-inactive-resources.rgen │ │ ├── raygen.rgen │ │ ├── raymiss-chain.rmiss │ │ ├── raymiss.rmiss │ │ ├── simple.dual-source-blending.frag │ │ ├── simple.invariant.vert │ │ ├── stage-input-output.16bit-io.frag │ │ ├── stage-input-output.frag │ │ ├── stream-out.stream-out.vert │ │ ├── swizzle.rt-swizzle.frag │ │ ├── task-basic.task │ │ ├── vertex-array-input.vert │ │ ├── vertex-array-output.vert │ │ └── vertex-input-remapping.vert │ ├── vectorization/ │ │ ├── copy-byte-address.ssbo.comp │ │ ├── copy-composite-2.ssbo.comp │ │ ├── copy-composite.ssbo.comp │ │ ├── copy-composite.ssbo.ssbo-align.bindless.comp │ │ ├── copy-double2.ssbo.comp │ │ ├── copy-double2.ssbo.ssbo-align.bindless.comp │ │ ├── copy-double3.ssbo.comp │ │ ├── copy-double3.ssbo.ssbo-align.bindless.comp │ │ ├── copy-double4.ssbo.comp │ │ ├── copy-float2.ssbo.comp │ │ ├── copy-float2.ssbo.ssbo-align.bindless.comp │ │ ├── copy-float2x2.ssbo.comp │ │ ├── copy-float3.ssbo.comp │ │ ├── copy-float3.ssbo.ssbo-align.bindless.comp │ │ ├── copy-float4x4.ssbo.comp │ │ ├── copy-half2.ssbo.comp │ │ ├── copy-half2.ssbo.ssbo-align.bindless.comp │ │ ├── copy-half3.ssbo.comp │ │ ├── copy-half3.ssbo.ssbo-align.bindless.comp │ │ ├── copy-half4.ssbo.comp │ │ └── copy-half4.ssbo.ssbo-align.bindless.comp │ ├── view-instancing/ │ │ ├── geom/ │ │ │ ├── basic.view-instancing.last-pre-raster.geom │ │ │ ├── basic.view-instancing.last-pre-raster.view-instance-mask.geom │ │ │ ├── basic.view-instancing.view-instancing-multiview.last-pre-raster.geom │ │ │ ├── basic.view-instancing.view-instancing-multiview.view-instancing-viewport-offset.last-pre-raster.geom │ │ │ └── basic.view-instancing.view-instancing-viewport-offset.last-pre-raster.geom │ │ ├── mesh/ │ │ │ ├── basic-export-viewport-layer.view-instancing.last-pre-raster.mesh │ │ │ ├── basic-export-viewport-layer.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh │ │ │ ├── basic-few-thread.view-instancing.last-pre-raster.mesh │ │ │ ├── basic-many-thread.view-instancing.last-pre-raster.mesh │ │ │ ├── basic.view-instancing.last-pre-raster.mesh │ │ │ ├── basic.view-instancing.last-pre-raster.view-instance-mask.mesh │ │ │ ├── basic.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh │ │ │ └── basic.view-instancing.mesh │ │ ├── tesc/ │ │ │ ├── basic.view-instancing.tesc │ │ │ └── basic.view-instancing.view-instancing-multiview.tesc │ │ ├── tese/ │ │ │ ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.last-pre-raster.tese │ │ │ ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.tese │ │ │ ├── domain.view-instancing.last-pre-raster.tese │ │ │ ├── domain.view-instancing.tese │ │ │ ├── domain.view-instancing.view-instancing-multiview.last-pre-raster.tese │ │ │ ├── domain.view-instancing.view-instancing-multiview.tese │ │ │ ├── domain.view-instancing.view-instancing-viewport-offset.last-pre-raster.tese │ │ │ └── domain.view-instancing.view-instancing-viewport-offset.tese │ │ └── vert/ │ │ ├── basic.view-instancing.export-layer-viewport.last-pre-raster.vert │ │ ├── basic.view-instancing.export-layer-viewport.vert │ │ ├── basic.view-instancing.export-layer-viewport.view-instancing-multiview.vert │ │ ├── basic.view-instancing.export-layer-viewport.view-instancing-viewport-offset.vert │ │ ├── basic.view-instancing.export-layer.last-pre-raster.vert │ │ ├── basic.view-instancing.export-layer.vert │ │ ├── basic.view-instancing.export-layer.view-instancing-multiview.vert │ │ ├── basic.view-instancing.export-layer.view-instancing-viewport-offset.vert │ │ ├── basic.view-instancing.export-viewport.last-pre-raster.vert │ │ ├── basic.view-instancing.export-viewport.vert │ │ ├── basic.view-instancing.export-viewport.view-instancing-multiview.vert │ │ ├── basic.view-instancing.export-viewport.view-instancing-viewport-offset.vert │ │ ├── basic.view-instancing.last-pre-raster.vert │ │ ├── basic.view-instancing.vert │ │ ├── basic.view-instancing.view-instance-mask.last-pre-raster.vert │ │ ├── basic.view-instancing.view-instance-mask.vert │ │ ├── basic.view-instancing.view-instancing-multiview.vert │ │ └── basic.view-instancing.view-instancing-viewport-offset.vert │ └── vkmm/ │ ├── coopmat.sm66.ssbo.vkmm.comp │ ├── cross_group_sharing.vkmm.node.inline-ubo.comp │ ├── descriptor_qa.bindless.descriptor-qa.vkmm.comp │ ├── groupshared.vkmm.comp │ ├── hull.vkmm.tesc │ ├── image-load-store.vkmm.comp │ ├── image-load-store.vkmm.sm66.comp │ ├── memory-model/ │ │ ├── uav-coherent-promotion.bindless.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.root-descriptor.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.sm66.bindless.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.sm66.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.ssbo.vkmm.comp │ │ ├── uav-coherent.root-descriptor.ssbo.vkmm.comp │ │ ├── uav-coherent.sm66.ssbo.vkmm.comp │ │ └── uav-coherent.ssbo.vkmm.comp │ ├── report-hit.vkmm.rint │ ├── rov-structured.vkmm.frag │ └── rov-tex2d.vkmm.frag ├── reference-dxbc/ │ ├── test_arithmetic_bool.asm │ ├── test_arithmetic_bool.glsl │ ├── test_arithmetic_fp16_compare.asm │ ├── test_arithmetic_fp16_compare.glsl │ ├── test_arithmetic_fp16_packing.asm │ ├── test_arithmetic_fp16_packing.glsl │ ├── test_arithmetic_fp16_packing_legacy.asm │ ├── test_arithmetic_fp16_packing_legacy.glsl │ ├── test_arithmetic_fp16_scalar.asm │ ├── test_arithmetic_fp16_scalar.glsl │ ├── test_arithmetic_fp16_vector.asm │ ├── test_arithmetic_fp16_vector.glsl │ ├── test_arithmetic_fp32.asm │ ├── test_arithmetic_fp32.glsl │ ├── test_arithmetic_fp32_compare.asm │ ├── test_arithmetic_fp32_compare.glsl │ ├── test_arithmetic_fp32_precise.asm │ ├── test_arithmetic_fp32_precise.glsl │ ├── test_arithmetic_fp32_special.asm │ ├── test_arithmetic_fp32_special.glsl │ ├── test_arithmetic_fp64.asm │ ├── test_arithmetic_fp64.glsl │ ├── test_arithmetic_fp64_compare.asm │ ├── test_arithmetic_fp64_compare.glsl │ ├── test_arithmetic_fp64_packing.asm │ ├── test_arithmetic_fp64_packing.glsl │ ├── test_arithmetic_int_extended.asm │ ├── test_arithmetic_int_extended.glsl │ ├── test_arithmetic_sint16_compare.asm │ ├── test_arithmetic_sint16_compare.glsl │ ├── test_arithmetic_sint16_scalar.asm │ ├── test_arithmetic_sint16_scalar.glsl │ ├── test_arithmetic_sint16_vector.asm │ ├── test_arithmetic_sint16_vector.glsl │ ├── test_arithmetic_sint32.asm │ ├── test_arithmetic_sint32.glsl │ ├── test_arithmetic_sint32_compare.asm │ ├── test_arithmetic_sint32_compare.glsl │ ├── test_arithmetic_uint16_compare.asm │ ├── test_arithmetic_uint16_compare.glsl │ ├── test_arithmetic_uint16_scalar.asm │ ├── test_arithmetic_uint16_scalar.glsl │ ├── test_arithmetic_uint16_vector.asm │ ├── test_arithmetic_uint16_vector.glsl │ ├── test_arithmetic_uint32.asm │ ├── test_arithmetic_uint32.glsl │ ├── test_arithmetic_uint32_compare.asm │ ├── test_arithmetic_uint32_compare.glsl │ ├── test_cfg_if.asm │ ├── test_cfg_if.glsl │ ├── test_cfg_if_else.asm │ ├── test_cfg_if_else.glsl │ ├── test_cfg_loop_infinite.asm │ ├── test_cfg_loop_infinite.glsl │ ├── test_cfg_loop_once.asm │ ├── test_cfg_loop_once.glsl │ ├── test_cfg_switch_complex.asm │ ├── test_cfg_switch_complex.glsl │ ├── test_cfg_switch_simple.asm │ ├── test_cfg_switch_simple.glsl │ ├── test_convert_f_to_f.asm │ ├── test_convert_f_to_f.glsl │ ├── test_convert_f_to_i.asm │ ├── test_convert_f_to_i.glsl │ ├── test_convert_i_to_f.asm │ ├── test_convert_i_to_f.glsl │ ├── test_convert_i_to_i.asm │ ├── test_convert_i_to_i.glsl │ ├── test_io_cs_builtins.asm │ ├── test_io_cs_builtins.glsl │ ├── test_io_ds_isoline.asm │ ├── test_io_ds_isoline.glsl │ ├── test_io_ds_quad.asm │ ├── test_io_ds_quad.glsl │ ├── test_io_ds_triangle.asm │ ├── test_io_ds_triangle.glsl │ ├── test_io_gs_basic_line.asm │ ├── test_io_gs_basic_line.glsl │ ├── test_io_gs_basic_line_adj.asm │ ├── test_io_gs_basic_line_adj.glsl │ ├── test_io_gs_basic_point.asm │ ├── test_io_gs_basic_point.glsl │ ├── test_io_gs_basic_triangle.asm │ ├── test_io_gs_basic_triangle.glsl │ ├── test_io_gs_basic_triangle_adj.asm │ ├── test_io_gs_basic_triangle_adj.glsl │ ├── test_io_gs_instanced.asm │ ├── test_io_gs_instanced.glsl │ ├── test_io_gs_multi_stream_xfb_raster_0.asm │ ├── test_io_gs_multi_stream_xfb_raster_0.glsl │ ├── test_io_gs_multi_stream_xfb_raster_1.asm │ ├── test_io_gs_multi_stream_xfb_raster_1.glsl │ ├── test_io_gs_xfb.asm │ ├── test_io_gs_xfb.glsl │ ├── test_io_hs_line.asm │ ├── test_io_hs_line.glsl │ ├── test_io_hs_point.asm │ ├── test_io_hs_point.glsl │ ├── test_io_hs_triangle_ccw.asm │ ├── test_io_hs_triangle_ccw.glsl │ ├── test_io_hs_triangle_cw.asm │ ├── test_io_hs_triangle_cw.glsl │ ├── test_io_ps_builtins.asm │ ├── test_io_ps_builtins.glsl │ ├── test_io_ps_export_depth.asm │ ├── test_io_ps_export_depth.glsl │ ├── test_io_ps_export_depth_greater.asm │ ├── test_io_ps_export_depth_greater.glsl │ ├── test_io_ps_export_depth_less.asm │ ├── test_io_ps_export_depth_less.glsl │ ├── test_io_ps_export_stencil.asm │ ├── test_io_ps_export_stencil.glsl │ ├── test_io_ps_fully_covered.asm │ ├── test_io_ps_fully_covered.glsl │ ├── test_io_ps_interpolate_centroid.asm │ ├── test_io_ps_interpolate_centroid.glsl │ ├── test_io_ps_interpolate_offset.asm │ ├── test_io_ps_interpolate_offset.glsl │ ├── test_io_ps_interpolate_sample.asm │ ├── test_io_ps_interpolate_sample.glsl │ ├── test_io_vs.asm │ ├── test_io_vs.glsl │ ├── test_io_vs_clip_cull_dist.asm │ ├── test_io_vs_clip_cull_dist.glsl │ ├── test_io_vs_clip_dist.asm │ ├── test_io_vs_clip_dist.glsl │ ├── test_io_vs_cull_dist.asm │ ├── test_io_vs_cull_dist.glsl │ ├── test_io_vs_instance_id.asm │ ├── test_io_vs_instance_id.glsl │ ├── test_io_vs_layer.asm │ ├── test_io_vs_layer.glsl │ ├── test_io_vs_vertex_id.asm │ ├── test_io_vs_vertex_id.glsl │ ├── test_io_vs_viewport.asm │ ├── test_io_vs_viewport.glsl │ ├── test_misc_constant_load.asm │ ├── test_misc_constant_load.glsl │ ├── test_misc_function.asm │ ├── test_misc_function.glsl │ ├── test_misc_function_with_args.asm │ ├── test_misc_function_with_args.glsl │ ├── test_misc_function_with_return.asm │ ├── test_misc_function_with_return.glsl │ ├── test_misc_function_with_undef.asm │ ├── test_misc_function_with_undef.glsl │ ├── test_misc_lds.asm │ ├── test_misc_lds.glsl │ ├── test_misc_lds_atomic.asm │ ├── test_misc_lds_atomic.glsl │ ├── test_misc_ps_demote.asm │ ├── test_misc_ps_demote.glsl │ ├── test_misc_ps_early_z.asm │ ├── test_misc_ps_early_z.glsl │ ├── test_misc_scratch.asm │ ├── test_misc_scratch.glsl │ ├── test_resource_rov.asm │ ├── test_resource_rov.glsl │ ├── test_resource_srv_buffer_load_sparse_feedback.asm │ ├── test_resource_srv_buffer_load_sparse_feedback.glsl │ ├── test_resource_srv_image_1d_array_load.asm │ ├── test_resource_srv_image_1d_array_load.glsl │ ├── test_resource_srv_image_1d_array_query.asm │ ├── test_resource_srv_image_1d_array_query.glsl │ ├── test_resource_srv_image_1d_array_sample.asm │ ├── test_resource_srv_image_1d_array_sample.glsl │ ├── test_resource_srv_image_1d_load.asm │ ├── test_resource_srv_image_1d_load.glsl │ ├── test_resource_srv_image_1d_query.asm │ ├── test_resource_srv_image_1d_query.glsl │ ├── test_resource_srv_image_1d_sample.asm │ ├── test_resource_srv_image_1d_sample.glsl │ ├── test_resource_srv_image_2d_array_gather.asm │ ├── test_resource_srv_image_2d_array_gather.glsl │ ├── test_resource_srv_image_2d_array_gather_depth.asm │ ├── test_resource_srv_image_2d_array_gather_depth.glsl │ ├── test_resource_srv_image_2d_array_load.asm │ ├── test_resource_srv_image_2d_array_load.glsl │ ├── test_resource_srv_image_2d_array_query.asm │ ├── test_resource_srv_image_2d_array_query.glsl │ ├── test_resource_srv_image_2d_array_sample.asm │ ├── test_resource_srv_image_2d_array_sample.glsl │ ├── test_resource_srv_image_2d_array_sample_depth.asm │ ├── test_resource_srv_image_2d_array_sample_depth.glsl │ ├── test_resource_srv_image_2d_gather.asm │ ├── test_resource_srv_image_2d_gather.glsl │ ├── test_resource_srv_image_2d_gather_depth.asm │ ├── test_resource_srv_image_2d_gather_depth.glsl │ ├── test_resource_srv_image_2d_load.asm │ ├── test_resource_srv_image_2d_load.glsl │ ├── test_resource_srv_image_2d_ms_array_load.asm │ ├── test_resource_srv_image_2d_ms_array_load.glsl │ ├── test_resource_srv_image_2d_ms_array_query.asm │ ├── test_resource_srv_image_2d_ms_array_query.glsl │ ├── test_resource_srv_image_2d_ms_load.asm │ ├── test_resource_srv_image_2d_ms_load.glsl │ ├── test_resource_srv_image_2d_ms_query.asm │ ├── test_resource_srv_image_2d_ms_query.glsl │ ├── test_resource_srv_image_2d_query.asm │ ├── test_resource_srv_image_2d_query.glsl │ ├── test_resource_srv_image_2d_sample.asm │ ├── test_resource_srv_image_2d_sample.glsl │ ├── test_resource_srv_image_2d_sample_depth.asm │ ├── test_resource_srv_image_2d_sample_depth.glsl │ ├── test_resource_srv_image_3d_load.asm │ ├── test_resource_srv_image_3d_load.glsl │ ├── test_resource_srv_image_3d_query.asm │ ├── test_resource_srv_image_3d_query.glsl │ ├── test_resource_srv_image_3d_sample.asm │ ├── test_resource_srv_image_3d_sample.glsl │ ├── test_resource_srv_image_cube_array_gather.asm │ ├── test_resource_srv_image_cube_array_gather.glsl │ ├── test_resource_srv_image_cube_array_gather_depth.asm │ ├── test_resource_srv_image_cube_array_gather_depth.glsl │ ├── test_resource_srv_image_cube_array_query.asm │ ├── test_resource_srv_image_cube_array_query.glsl │ ├── test_resource_srv_image_cube_array_sample.asm │ ├── test_resource_srv_image_cube_array_sample.glsl │ ├── test_resource_srv_image_cube_array_sample_depth.asm │ ├── test_resource_srv_image_cube_array_sample_depth.glsl │ ├── test_resource_srv_image_cube_gather.asm │ ├── test_resource_srv_image_cube_gather.glsl │ ├── test_resource_srv_image_cube_gather_depth.asm │ ├── test_resource_srv_image_cube_gather_depth.glsl │ ├── test_resource_srv_image_cube_query.asm │ ├── test_resource_srv_image_cube_query.glsl │ ├── test_resource_srv_image_cube_sample.asm │ ├── test_resource_srv_image_cube_sample.glsl │ ├── test_resource_srv_image_cube_sample_depth.asm │ ├── test_resource_srv_image_cube_sample_depth.glsl │ ├── test_resource_srv_image_gather_depth_sparse_feedback.asm │ ├── test_resource_srv_image_gather_depth_sparse_feedback.glsl │ ├── test_resource_srv_image_gather_sparse_feedback.asm │ ├── test_resource_srv_image_gather_sparse_feedback.glsl │ ├── test_resource_srv_image_load_sparse_feedback.asm │ ├── test_resource_srv_image_load_sparse_feedback.glsl │ ├── test_resource_srv_image_sample_depth_sparse_feedback.asm │ ├── test_resource_srv_image_sample_depth_sparse_feedback.glsl │ ├── test_resource_srv_image_sample_sparse_feedback.asm │ ├── test_resource_srv_image_sample_sparse_feedback.glsl │ ├── test_resource_srv_indexed_image_1d_array_load.asm │ ├── test_resource_srv_indexed_image_1d_array_load.glsl │ ├── test_resource_srv_indexed_image_1d_array_query.asm │ ├── test_resource_srv_indexed_image_1d_array_query.glsl │ ├── test_resource_srv_indexed_image_1d_array_sample.asm │ ├── test_resource_srv_indexed_image_1d_array_sample.glsl │ ├── test_resource_srv_indexed_image_1d_load.asm │ ├── test_resource_srv_indexed_image_1d_load.glsl │ ├── test_resource_srv_indexed_image_1d_query.asm │ ├── test_resource_srv_indexed_image_1d_query.glsl │ ├── test_resource_srv_indexed_image_1d_sample.asm │ ├── test_resource_srv_indexed_image_1d_sample.glsl │ ├── test_resource_srv_indexed_image_2d_array_gather.asm │ ├── test_resource_srv_indexed_image_2d_array_gather.glsl │ ├── test_resource_srv_indexed_image_2d_array_gather_depth.asm │ ├── test_resource_srv_indexed_image_2d_array_gather_depth.glsl │ ├── test_resource_srv_indexed_image_2d_array_load.asm │ ├── test_resource_srv_indexed_image_2d_array_load.glsl │ ├── test_resource_srv_indexed_image_2d_array_query.asm │ ├── test_resource_srv_indexed_image_2d_array_query.glsl │ ├── test_resource_srv_indexed_image_2d_array_sample.asm │ ├── test_resource_srv_indexed_image_2d_array_sample.glsl │ ├── test_resource_srv_indexed_image_2d_array_sample_depth.asm │ ├── test_resource_srv_indexed_image_2d_array_sample_depth.glsl │ ├── test_resource_srv_indexed_image_2d_gather.asm │ ├── test_resource_srv_indexed_image_2d_gather.glsl │ ├── test_resource_srv_indexed_image_2d_gather_depth.asm │ ├── test_resource_srv_indexed_image_2d_gather_depth.glsl │ ├── test_resource_srv_indexed_image_2d_load.asm │ ├── test_resource_srv_indexed_image_2d_load.glsl │ ├── test_resource_srv_indexed_image_2d_ms_array_load.asm │ ├── test_resource_srv_indexed_image_2d_ms_array_load.glsl │ ├── test_resource_srv_indexed_image_2d_ms_array_query.asm │ ├── test_resource_srv_indexed_image_2d_ms_array_query.glsl │ ├── test_resource_srv_indexed_image_2d_ms_load.asm │ ├── test_resource_srv_indexed_image_2d_ms_load.glsl │ ├── test_resource_srv_indexed_image_2d_ms_query.asm │ ├── test_resource_srv_indexed_image_2d_ms_query.glsl │ ├── test_resource_srv_indexed_image_2d_query.asm │ ├── test_resource_srv_indexed_image_2d_query.glsl │ ├── test_resource_srv_indexed_image_2d_sample.asm │ ├── test_resource_srv_indexed_image_2d_sample.glsl │ ├── test_resource_srv_indexed_image_2d_sample_depth.asm │ ├── test_resource_srv_indexed_image_2d_sample_depth.glsl │ ├── test_resource_srv_indexed_image_3d_load.asm │ ├── test_resource_srv_indexed_image_3d_load.glsl │ ├── test_resource_srv_indexed_image_3d_query.asm │ ├── test_resource_srv_indexed_image_3d_query.glsl │ ├── test_resource_srv_indexed_image_3d_sample.asm │ ├── test_resource_srv_indexed_image_3d_sample.glsl │ ├── test_resource_srv_indexed_image_cube_array_gather.asm │ ├── test_resource_srv_indexed_image_cube_array_gather.glsl │ ├── test_resource_srv_indexed_image_cube_array_gather_depth.asm │ ├── test_resource_srv_indexed_image_cube_array_gather_depth.glsl │ ├── test_resource_srv_indexed_image_cube_array_query.asm │ ├── test_resource_srv_indexed_image_cube_array_query.glsl │ ├── test_resource_srv_indexed_image_cube_array_sample.asm │ ├── test_resource_srv_indexed_image_cube_array_sample.glsl │ ├── test_resource_srv_indexed_image_cube_array_sample_depth.asm │ ├── test_resource_srv_indexed_image_cube_array_sample_depth.glsl │ ├── test_resource_srv_indexed_image_cube_gather.asm │ ├── test_resource_srv_indexed_image_cube_gather.glsl │ ├── test_resource_srv_indexed_image_cube_gather_depth.asm │ ├── test_resource_srv_indexed_image_cube_gather_depth.glsl │ ├── test_resource_srv_indexed_image_cube_query.asm │ ├── test_resource_srv_indexed_image_cube_query.glsl │ ├── test_resource_srv_indexed_image_cube_sample.asm │ ├── test_resource_srv_indexed_image_cube_sample.glsl │ ├── test_resource_srv_indexed_image_cube_sample_depth.asm │ ├── test_resource_srv_indexed_image_cube_sample_depth.glsl │ ├── test_resource_uav_buffer_load_sparse_feedback.asm │ ├── test_resource_uav_buffer_load_sparse_feedback.glsl │ ├── test_resource_uav_counter.asm │ ├── test_resource_uav_counter.glsl │ ├── test_resource_uav_counter_indexed.asm │ ├── test_resource_uav_counter_indexed.glsl │ ├── test_resource_uav_image_1d_array_atomic.asm │ ├── test_resource_uav_image_1d_array_atomic.glsl │ ├── test_resource_uav_image_1d_array_load.asm │ ├── test_resource_uav_image_1d_array_load.glsl │ ├── test_resource_uav_image_1d_array_query.asm │ ├── test_resource_uav_image_1d_array_query.glsl │ ├── test_resource_uav_image_1d_array_store.asm │ ├── test_resource_uav_image_1d_array_store.glsl │ ├── test_resource_uav_image_1d_atomic.asm │ ├── test_resource_uav_image_1d_atomic.glsl │ ├── test_resource_uav_image_1d_load.asm │ ├── test_resource_uav_image_1d_load.glsl │ ├── test_resource_uav_image_1d_query.asm │ ├── test_resource_uav_image_1d_query.glsl │ ├── test_resource_uav_image_1d_store.asm │ ├── test_resource_uav_image_1d_store.glsl │ ├── test_resource_uav_image_2d_array_atomic.asm │ ├── test_resource_uav_image_2d_array_atomic.glsl │ ├── test_resource_uav_image_2d_array_load.asm │ ├── test_resource_uav_image_2d_array_load.glsl │ ├── test_resource_uav_image_2d_array_query.asm │ ├── test_resource_uav_image_2d_array_query.glsl │ ├── test_resource_uav_image_2d_array_store.asm │ ├── test_resource_uav_image_2d_array_store.glsl │ ├── test_resource_uav_image_2d_atomic.asm │ ├── test_resource_uav_image_2d_atomic.glsl │ ├── test_resource_uav_image_2d_load.asm │ ├── test_resource_uav_image_2d_load.glsl │ ├── test_resource_uav_image_2d_load_precise.asm │ ├── test_resource_uav_image_2d_load_precise.glsl │ ├── test_resource_uav_image_2d_query.asm │ ├── test_resource_uav_image_2d_query.glsl │ ├── test_resource_uav_image_2d_store.asm │ ├── test_resource_uav_image_2d_store.glsl │ ├── test_resource_uav_image_3d_atomic.asm │ ├── test_resource_uav_image_3d_atomic.glsl │ ├── test_resource_uav_image_3d_load.asm │ ├── test_resource_uav_image_3d_load.glsl │ ├── test_resource_uav_image_3d_query.asm │ ├── test_resource_uav_image_3d_query.glsl │ ├── test_resource_uav_image_3d_store.asm │ ├── test_resource_uav_image_3d_store.glsl │ ├── test_resource_uav_image_load_sparse_feedback.asm │ ├── test_resource_uav_image_load_sparse_feedback.glsl │ ├── test_resource_uav_indexed_image_1d_array_atomic.asm │ ├── test_resource_uav_indexed_image_1d_array_atomic.glsl │ ├── test_resource_uav_indexed_image_1d_array_load.asm │ ├── test_resource_uav_indexed_image_1d_array_load.glsl │ ├── test_resource_uav_indexed_image_1d_array_query.asm │ ├── test_resource_uav_indexed_image_1d_array_query.glsl │ ├── test_resource_uav_indexed_image_1d_array_store.asm │ ├── test_resource_uav_indexed_image_1d_array_store.glsl │ ├── test_resource_uav_indexed_image_1d_atomic.asm │ ├── test_resource_uav_indexed_image_1d_atomic.glsl │ ├── test_resource_uav_indexed_image_1d_load.asm │ ├── test_resource_uav_indexed_image_1d_load.glsl │ ├── test_resource_uav_indexed_image_1d_query.asm │ ├── test_resource_uav_indexed_image_1d_query.glsl │ ├── test_resource_uav_indexed_image_1d_store.asm │ ├── test_resource_uav_indexed_image_1d_store.glsl │ ├── test_resource_uav_indexed_image_2d_array_atomic.asm │ ├── test_resource_uav_indexed_image_2d_array_atomic.glsl │ ├── test_resource_uav_indexed_image_2d_array_load.asm │ ├── test_resource_uav_indexed_image_2d_array_load.glsl │ ├── test_resource_uav_indexed_image_2d_array_query.asm │ ├── test_resource_uav_indexed_image_2d_array_query.glsl │ ├── test_resource_uav_indexed_image_2d_array_store.asm │ ├── test_resource_uav_indexed_image_2d_array_store.glsl │ ├── test_resource_uav_indexed_image_2d_atomic.asm │ ├── test_resource_uav_indexed_image_2d_atomic.glsl │ ├── test_resource_uav_indexed_image_2d_load.asm │ ├── test_resource_uav_indexed_image_2d_load.glsl │ ├── test_resource_uav_indexed_image_2d_query.asm │ ├── test_resource_uav_indexed_image_2d_query.glsl │ ├── test_resource_uav_indexed_image_2d_store.asm │ ├── test_resource_uav_indexed_image_2d_store.glsl │ ├── test_resource_uav_indexed_image_3d_atomic.asm │ ├── test_resource_uav_indexed_image_3d_atomic.glsl │ ├── test_resource_uav_indexed_image_3d_load.asm │ ├── test_resource_uav_indexed_image_3d_load.glsl │ ├── test_resource_uav_indexed_image_3d_query.asm │ ├── test_resource_uav_indexed_image_3d_query.glsl │ ├── test_resource_uav_indexed_image_3d_store.asm │ ├── test_resource_uav_indexed_image_3d_store.glsl │ ├── test_resources_cbv.asm │ ├── test_resources_cbv.glsl │ ├── test_resources_cbv_dynamic.asm │ ├── test_resources_cbv_dynamic.glsl │ ├── test_resources_cbv_indexed.asm │ ├── test_resources_cbv_indexed.glsl │ ├── test_resources_cbv_indexed_nonuniform.asm │ ├── test_resources_cbv_indexed_nonuniform.glsl │ ├── test_resources_srv_buffer_raw_load.asm │ ├── test_resources_srv_buffer_raw_load.glsl │ ├── test_resources_srv_buffer_raw_query.asm │ ├── test_resources_srv_buffer_raw_query.glsl │ ├── test_resources_srv_buffer_structured_load.asm │ ├── test_resources_srv_buffer_structured_load.glsl │ ├── test_resources_srv_buffer_structured_query.asm │ ├── test_resources_srv_buffer_structured_query.glsl │ ├── test_resources_srv_buffer_typed_load.asm │ ├── test_resources_srv_buffer_typed_load.glsl │ ├── test_resources_srv_buffer_typed_query.asm │ ├── test_resources_srv_buffer_typed_query.glsl │ ├── test_resources_srv_indexed_buffer_raw_load.asm │ ├── test_resources_srv_indexed_buffer_raw_load.glsl │ ├── test_resources_srv_indexed_buffer_raw_query.asm │ ├── test_resources_srv_indexed_buffer_raw_query.glsl │ ├── test_resources_srv_indexed_buffer_structured_load.asm │ ├── test_resources_srv_indexed_buffer_structured_load.glsl │ ├── test_resources_srv_indexed_buffer_structured_query.asm │ ├── test_resources_srv_indexed_buffer_structured_query.glsl │ ├── test_resources_srv_indexed_buffer_typed_load.asm │ ├── test_resources_srv_indexed_buffer_typed_load.glsl │ ├── test_resources_srv_indexed_buffer_typed_query.asm │ ├── test_resources_srv_indexed_buffer_typed_query.glsl │ ├── test_resources_uav_buffer_raw_atomic.asm │ ├── test_resources_uav_buffer_raw_atomic.glsl │ ├── test_resources_uav_buffer_raw_load.asm │ ├── test_resources_uav_buffer_raw_load.glsl │ ├── test_resources_uav_buffer_raw_load_precise.asm │ ├── test_resources_uav_buffer_raw_load_precise.glsl │ ├── test_resources_uav_buffer_raw_query.asm │ ├── test_resources_uav_buffer_raw_query.glsl │ ├── test_resources_uav_buffer_raw_store.asm │ ├── test_resources_uav_buffer_raw_store.glsl │ ├── test_resources_uav_buffer_structured_atomic.asm │ ├── test_resources_uav_buffer_structured_atomic.glsl │ ├── test_resources_uav_buffer_structured_load.asm │ ├── test_resources_uav_buffer_structured_load.glsl │ ├── test_resources_uav_buffer_structured_load_precise.asm │ ├── test_resources_uav_buffer_structured_load_precise.glsl │ ├── test_resources_uav_buffer_structured_query.asm │ ├── test_resources_uav_buffer_structured_query.glsl │ ├── test_resources_uav_buffer_structured_store.asm │ ├── test_resources_uav_buffer_structured_store.glsl │ ├── test_resources_uav_buffer_typed_atomic.asm │ ├── test_resources_uav_buffer_typed_atomic.glsl │ ├── test_resources_uav_buffer_typed_load.asm │ ├── test_resources_uav_buffer_typed_load.glsl │ ├── test_resources_uav_buffer_typed_load_precise.asm │ ├── test_resources_uav_buffer_typed_load_precise.glsl │ ├── test_resources_uav_buffer_typed_query.asm │ ├── test_resources_uav_buffer_typed_query.glsl │ ├── test_resources_uav_buffer_typed_store.asm │ ├── test_resources_uav_buffer_typed_store.glsl │ ├── test_resources_uav_indexed_buffer_raw_atomic.asm │ ├── test_resources_uav_indexed_buffer_raw_atomic.glsl │ ├── test_resources_uav_indexed_buffer_raw_load.asm │ ├── test_resources_uav_indexed_buffer_raw_load.glsl │ ├── test_resources_uav_indexed_buffer_raw_query.asm │ ├── test_resources_uav_indexed_buffer_raw_query.glsl │ ├── test_resources_uav_indexed_buffer_raw_store.asm │ ├── test_resources_uav_indexed_buffer_raw_store.glsl │ ├── test_resources_uav_indexed_buffer_structured_atomic.asm │ ├── test_resources_uav_indexed_buffer_structured_atomic.glsl │ ├── test_resources_uav_indexed_buffer_structured_load.asm │ ├── test_resources_uav_indexed_buffer_structured_load.glsl │ ├── test_resources_uav_indexed_buffer_structured_query.asm │ ├── test_resources_uav_indexed_buffer_structured_query.glsl │ ├── test_resources_uav_indexed_buffer_structured_store.asm │ ├── test_resources_uav_indexed_buffer_structured_store.glsl │ ├── test_resources_uav_indexed_buffer_typed_atomic.asm │ ├── test_resources_uav_indexed_buffer_typed_atomic.glsl │ ├── test_resources_uav_indexed_buffer_typed_load.asm │ ├── test_resources_uav_indexed_buffer_typed_load.glsl │ ├── test_resources_uav_indexed_buffer_typed_query.asm │ ├── test_resources_uav_indexed_buffer_typed_query.glsl │ ├── test_resources_uav_indexed_buffer_typed_store.asm │ └── test_resources_uav_indexed_buffer_typed_store.glsl ├── roundtrip_shaders.py ├── scratch_pool.hpp ├── shaders/ │ ├── ags/ │ │ ├── ags.ssbo.comp │ │ ├── ags_shader_intrinsics_dx12.inc │ │ ├── cs_constexpr_wmma_gep.sm66.full-wmma.ssbo.comp │ │ ├── cs_constexpr_wmma_gep.sm66.ssbo.comp │ │ ├── cs_wmma_alloca.sm66.ssbo.comp │ │ ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.comp │ │ ├── cs_wmma_copy_transpose_fp16.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_extract_insert.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_at.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_bt.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_ct.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_f16_ot.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_f16_quant_fp8.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f16.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f16_strided_transpose.sm66.ssbo.comp │ │ ├── cs_wmma_f32_16x16x16_fp8_quant_f32.sm66.ssbo.comp │ │ ├── cs_wmma_fp16_fp8_conversions.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_fp32_fp16_conversions.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_fp32_fp8_conversions.sm66.ssbo.nv-coopmat2.comp │ │ ├── cs_wmma_fp8_fp32_conversions.sm66.ssbo.full-wmma.comp │ │ ├── cs_wmma_lds_transpose.sm66.ssbo.comp │ │ ├── cs_wmma_matrix_length.sm66.ssbo.comp │ │ ├── cs_wmma_store_phi.full-wmma.sm66.ssbo.comp │ │ ├── cs_wmma_store_phi.sm66.ssbo.comp │ │ └── wmma_ags.h │ ├── alloca-opts/ │ │ ├── bad-stride.frag │ │ ├── double-array-load.frag │ │ ├── float4-array-load.bindless.frag │ │ ├── float4-array-load.bindless.root-constants.frag │ │ ├── float4-array-load.frag │ │ ├── float4-array-load.root-constant.frag │ │ ├── float4-array-load.root-descriptor.frag │ │ ├── float4-array-load.root-descriptor.root-constants.frag │ │ ├── load-different.frag │ │ ├── local-root-constants.local-root-signature.rgen │ │ ├── matrix-load.frag │ │ ├── missing-first.frag │ │ ├── missing-last-element.frag │ │ ├── out-of-order-load.frag │ │ ├── store-after-load.frag │ │ └── uint4-array-load.frag │ ├── asm/ │ │ ├── bfi.bc.dxil │ │ ├── cbv.no-legacy-cbuf-layout.sm66-heaps-single-alias.bc.dxil │ │ ├── cbv.no-legacy-cbuf-layout.sm66-heaps.bc.dxil │ │ ├── constant-struct-aggregate.bc.dxil │ │ ├── control-flow-multi-break-with-non-idom-loop-header.dxil │ │ ├── ibfe.bc.dxil │ │ └── ubfe.bc.dxil │ ├── auto-barrier/ │ │ ├── complex-loop.auto-group-shared-barrier.comp │ │ ├── inner-to-inner.auto-group-shared-barrier.comp │ │ ├── inner-to-outer.auto-group-shared-barrier.comp │ │ ├── outer-to-inner.auto-group-shared-barrier.comp │ │ ├── single-block-loop.auto-group-shared-barrier.comp │ │ └── single-block.auto-group-shared-barrier.comp │ ├── control-flow/ │ │ ├── branch-return-2.comp │ │ ├── branch-return.comp │ │ ├── branch.comp │ │ ├── conditional-break-into-if-else-if-ladder-2.comp │ │ ├── conditional-break-into-if-else-if-ladder.comp │ │ ├── dual-inner-loop-early-return.comp │ │ ├── if-else-if-into-continue.comp │ │ ├── inner-loop-early-return.comp │ │ ├── interleaved-unrolled-loop-breaks.comp │ │ ├── loop-break-2.comp │ │ ├── loop-break.comp │ │ ├── loop-continue-2.comp │ │ ├── loop-continue-3.comp │ │ ├── loop-continue.comp │ │ ├── loop-inside-infinite-loop-2.frag │ │ ├── loop-inside-infinite-loop.frag │ │ ├── loop-return.comp │ │ ├── loop.comp │ │ ├── nested-loop-break-2.comp │ │ ├── nested-loop-break.comp │ │ ├── nested-loop.comp │ │ ├── selection-merge-split-post-domination.frag │ │ ├── switch-continue.frag │ │ ├── switch-merge-into-other-merge.comp │ │ ├── switch-shared-header-with-loop.comp │ │ └── wave-size-dependent-loop-unroll.comp │ ├── descriptor_qa/ │ │ ├── acceleration-structure.bindless.descriptor-qa.rgen │ │ ├── acceleration-structure.bindless.descriptor-qa.sm66.rgen │ │ ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.descriptor-qa.rgen │ │ ├── descriptor_qa.bindless.cbv-as-ssbo.descriptor-qa.comp │ │ ├── descriptor_qa.bindless.descriptor-qa.comp │ │ ├── descriptor_qa.bindless.descriptor-qa.sm66.comp │ │ ├── descriptor_qa.bindless.ssbo.descriptor-qa.comp │ │ ├── early-2.bindless.descriptor-qa.frag │ │ ├── early-3.bindless.descriptor-qa.frag │ │ ├── early-4.bindless.descriptor-qa.frag │ │ ├── early-5.bindless.descriptor-qa.frag │ │ ├── early-heap.descriptor-qa.sm66.frag │ │ └── early.bindless.descriptor-qa.frag │ ├── dxil-builtin/ │ │ ├── accept-hit-and-end-search-ignore-hit.rany │ │ ├── acos.frag │ │ ├── asin.frag │ │ ├── atan.frag │ │ ├── atomic-bin-op.bindless.root-constant.frag │ │ ├── atomic-bin-op.frag │ │ ├── atomic-bin-op.root-descriptor.comp │ │ ├── atomic-bin-op.ssbo.frag │ │ ├── atomic-compare-exchange.frag │ │ ├── atomic-compare-exchange.root-descriptor.comp │ │ ├── atomic-compare-exchange.ssbo.frag │ │ ├── attributes.denorm-ftz.comp │ │ ├── attributes.denorm-preserve.comp │ │ ├── barrier.comp │ │ ├── barycentrics-2.frag │ │ ├── barycentrics.frag │ │ ├── bfrev.frag │ │ ├── bitcount-bitrev-sizes.ssbo.comp │ │ ├── buffer-load-feedback.frag │ │ ├── buffer-load-signed-feedback.frag │ │ ├── buffer-load-signed.frag │ │ ├── buffer-load.frag │ │ ├── buffer-load.ssbo.frag │ │ ├── buffer-store-signed.frag │ │ ├── buffer-store.frag │ │ ├── buffer-store.ssbo.frag │ │ ├── buffer-update-counter.frag │ │ ├── calculate-lod.frag │ │ ├── call-shader.rgen │ │ ├── clip.demote-to-helper.frag │ │ ├── clip.frag │ │ ├── compute-shader-derivatives-cube-array.noderivs.sm66.ssbo.comp │ │ ├── compute-shader-derivatives-cube.noderivs.sm66.ssbo.comp │ │ ├── compute-shader-derivatives-single-thread.sm66.ssbo.comp │ │ ├── compute-shader-derivatives.noderivs.sm66.ssbo.comp │ │ ├── cos.frag │ │ ├── countbits.frag │ │ ├── coverage.frag │ │ ├── derivative.frag │ │ ├── derivative.sm60.frag │ │ ├── derivative.sm60.native-fp16.frag │ │ ├── derivatives.sm66.comp │ │ ├── discard.demote-to-helper.frag │ │ ├── discard.frag │ │ ├── dispatch-rays-dimensions.rgen │ │ ├── dispatch-rays-index.rgen │ │ ├── dot2.frag │ │ ├── dot3.frag │ │ ├── dot4.frag │ │ ├── eval-centroid.frag │ │ ├── eval-sample-index.frag │ │ ├── eval-snapped.frag │ │ ├── exp.frag │ │ ├── f16-to-f32.frag │ │ ├── f32-to-f16.frag │ │ ├── fabs.frag │ │ ├── firstbithi-16.sm62.frag │ │ ├── firstbithi-64.frag │ │ ├── firstbithi.frag │ │ ├── firstbitlo-16.sm62.frag │ │ ├── firstbitlo-64.frag │ │ ├── firstbitlo.frag │ │ ├── firstbitshi-16.sm62.frag │ │ ├── firstbitshi-64.frag │ │ ├── firstbitshi.frag │ │ ├── flattened_thread_id_in_group.comp │ │ ├── fma.frag │ │ ├── fmad-precise.frag │ │ ├── fmad.frag │ │ ├── fmax.frag │ │ ├── fmin.frag │ │ ├── frc.frag │ │ ├── get-dimensions-w-only.frag │ │ ├── get-dimensions-xyz-only.frag │ │ ├── get-dimensions.bindless.root-constant.frag │ │ ├── get-dimensions.bindless.root-constant.ssbo.frag │ │ ├── get-dimensions.frag │ │ ├── get-dimensions.ssbo.frag │ │ ├── group_id.comp │ │ ├── hcos.frag │ │ ├── hsin.frag │ │ ├── htan.frag │ │ ├── imad.frag │ │ ├── imax.frag │ │ ├── imin.frag │ │ ├── instance-id.vert │ │ ├── is-helper-lane-2.demote-to-helper.sm66.frag │ │ ├── is-helper-lane-2.sm66.frag │ │ ├── is-helper-lane.demote-to-helper.sm66.frag │ │ ├── is-helper-lane.sm66.frag │ │ ├── isfinite.frag │ │ ├── isinf.frag │ │ ├── isnan.frag │ │ ├── log.frag │ │ ├── make-double.frag │ │ ├── msaa-uav.sm67.comp │ │ ├── msad.comp │ │ ├── object-ray-direction.rany │ │ ├── object-ray-origin.rany │ │ ├── object-to-world-3x4.rany │ │ ├── object-to-world-4x3.rany │ │ ├── pack-unpack.ssbo.sm66.comp │ │ ├── quad-all-any.sm67.comp │ │ ├── quad-all-any.sm67.quad-maximal-reconvergence.noglsl.comp │ │ ├── quad-read-at-2d.comp │ │ ├── quad-read-at-2d.sm66.comp │ │ ├── quad-read-at.comp │ │ ├── quad-read-at.frag │ │ ├── quad-swap.comp │ │ ├── quad-swap.frag │ │ ├── raw-gather-offset-sparse.sm67.ssbo.comp │ │ ├── raw-gather-offset.sm67.ssbo.comp │ │ ├── raw-gather-sparse.sm67.ssbo.comp │ │ ├── raw-gather.sm67.ssbo.comp │ │ ├── ray-query-phi-multi.invalid.sm66.comp │ │ ├── ray-query-phi-simple.sm66.comp │ │ ├── ray-query-select-multi.invalid.sm66.comp │ │ ├── ray-query-select-simple.sm66.comp │ │ ├── ray-query-store-multi.invalid.sm66.comp │ │ ├── ray-query-store-simple.sm66.comp │ │ ├── ray-query.comp │ │ ├── ray-t-current.rany │ │ ├── ray-t-min.rany │ │ ├── render-target-sample-count.frag │ │ ├── render-target-sample-position.frag │ │ ├── report-hit.rint │ │ ├── round-ne.frag │ │ ├── round-ni.frag │ │ ├── round-pi.frag │ │ ├── round-z.frag │ │ ├── rsqrt.frag │ │ ├── rt-geometry-index.rany │ │ ├── rt-hit-kind.rany │ │ ├── rt-instance-id.rany │ │ ├── rt-instance-index.rany │ │ ├── rt-primitive-index.rany │ │ ├── rt-ray-flags.rany │ │ ├── sample-bias-feedback.frag │ │ ├── sample-bias-offset.frag │ │ ├── sample-bias.frag │ │ ├── sample-cmp-bias-feedback.frag │ │ ├── sample-cmp-bias-offset.frag │ │ ├── sample-cmp-bias.frag │ │ ├── sample-cmp-feedback.frag │ │ ├── sample-cmp-grad-offset-feedback.frag │ │ ├── sample-cmp-grad-offset.frag │ │ ├── sample-cmp-grad.frag │ │ ├── sample-cmp-level.sm67.noglsl.frag │ │ ├── sample-cmp-levelzero.frag │ │ ├── sample-cmp-offset-levelzero-feedback.frag │ │ ├── sample-cmp-offset-levelzero.frag │ │ ├── sample-cmp-offset.frag │ │ ├── sample-cmp.frag │ │ ├── sample-grad-offset-dynamic.noglsl.invalid.sm67.frag │ │ ├── sample-grad-offset-feedback.frag │ │ ├── sample-grad-offset.frag │ │ ├── sample-grad.frag │ │ ├── sample-id.frag │ │ ├── sample-level-offset-feedback.frag │ │ ├── sample-level-offset.frag │ │ ├── sample-level.frag │ │ ├── sample-offset-dynamic.noglsl.invalid.sm67.frag │ │ ├── sample-offset.frag │ │ ├── sample.frag │ │ ├── saturate.frag │ │ ├── sin.frag │ │ ├── sm64-packed-arithmetic.ssbo.comp │ │ ├── sm64-packed-arithmetic.ssbo.i8dot.noglsl.comp │ │ ├── sm64-packed-arithmetic.ssbo.mixed-float-dot-product.noglsl.comp │ │ ├── split-double.frag │ │ ├── sqrt.frag │ │ ├── tan.frag │ │ ├── texture-gather-4offset.frag │ │ ├── texture-gather-cmp-offset-feedback.frag │ │ ├── texture-gather-cmp-offset.frag │ │ ├── texture-gather-cmp.frag │ │ ├── texture-gather-offset.frag │ │ ├── texture-gather-signed-feedback.frag │ │ ├── texture-gather-signed.frag │ │ ├── texture-gather.frag │ │ ├── texture-load-feedback.frag │ │ ├── texture-load-offset-dynamic.sm67.frag │ │ ├── texture-load-offset.frag │ │ ├── texture-load-signed.frag │ │ ├── texture-load.frag │ │ ├── texture-store-signed.frag │ │ ├── texture-store.frag │ │ ├── texture2dms-sample-position.frag │ │ ├── thread_id.comp │ │ ├── thread_id_in_group.comp │ │ ├── trace-ray-flags-2.rgen │ │ ├── trace-ray-flags.rgen │ │ ├── trace-ray.rgen │ │ ├── umad.frag │ │ ├── umax.frag │ │ ├── umin.frag │ │ ├── vertex-id.vert │ │ ├── wave-active-all-true.comp │ │ ├── wave-active-all-true.frag │ │ ├── wave-active-any-true.comp │ │ ├── wave-active-any-true.frag │ │ ├── wave-active-ballot-discard.demote-to-helper.frag │ │ ├── wave-active-ballot-discard.frag │ │ ├── wave-active-ballot.comp │ │ ├── wave-active-ballot.demote-to-helper.frag │ │ ├── wave-active-ballot.frag │ │ ├── wave-active-count-bits.comp │ │ ├── wave-active-count-bits.frag │ │ ├── wave-all-equal.comp │ │ ├── wave-all-equal.frag │ │ ├── wave-get-lane-count.comp │ │ ├── wave-get-lane-index.comp │ │ ├── wave-is-first-lane.comp │ │ ├── wave-is-first-lane.frag │ │ ├── wave-match.comp │ │ ├── wave-match.frag │ │ ├── wave-match.partitioned.noglsl.comp │ │ ├── wave-match.partitioned.noglsl.frag │ │ ├── wave-multi-prefix-count-bits.comp │ │ ├── wave-multi-prefix-count-bits.frag │ │ ├── wave-multi-prefix-op.comp │ │ ├── wave-multi-prefix-op.frag │ │ ├── wave-multi-prefix-op.partitioned.noglsl.comp │ │ ├── wave-multi-prefix-op.partitioned.noglsl.frag │ │ ├── wave-prefix.comp │ │ ├── wave-prefix.frag │ │ ├── wave-read-lane-at-optimizations.comp │ │ ├── wave-read-lane-at.comp │ │ ├── wave-read-lane-first.comp │ │ ├── wave-read-lane-first.frag │ │ ├── wave-reduce-helpers.sm67.frag │ │ ├── wave-reduce-helpers.sm67.quad-maximal-reconvergence.frag │ │ ├── wave-reduce.comp │ │ ├── wave-reduce.frag │ │ ├── wave-size.sm66.comp │ │ ├── world-ray-direction.rany │ │ ├── world-ray-origin.rany │ │ ├── world-to-object-3x4.rany │ │ └── world-to-object-4x3.rany │ ├── fp16/ │ │ ├── saturate.frag │ │ ├── saturate.sm60.frag │ │ └── saturate.sm60.native-fp16.frag │ ├── heap-robustness/ │ │ ├── misc.bindless.heap-raw-va-cbv.sm66.ssbo.comp │ │ ├── misc.bindless.heap-robustness.heap-robustness-cbv.sm66.ssbo.comp │ │ ├── misc.bindless.heap-robustness.sm66.ssbo.comp │ │ ├── misc.bindless.sm66.ssbo.comp │ │ └── misc.heap-robustness.bindless.heap-robustness-cbv.heap-raw-va-cbv.sm66.ssbo.comp │ ├── instrumentation/ │ │ ├── atomics-raw.bindless.bda-instrumentation.ssbo.comp │ │ ├── atomics-raw.root-descriptor.bda-instrumentation.ssbo.comp │ │ ├── atomics-structured-counter.bindless.bda-instrumentation.ssbo.comp │ │ ├── atomics-structured.bindless.bda-instrumentation.ssbo.comp │ │ ├── atomics-structured.root-descriptor.bda-instrumentation.ssbo.comp │ │ ├── atomics-typed.bindless.bda-instrumentation.ssbo.comp │ │ ├── cbv.bindless.bda-instrumentation.comp │ │ ├── cbv.root-descriptor.bda-instrumentation.comp │ │ ├── raw.bindless.bda-instrumentation.ssbo.comp │ │ ├── raw.root-descriptor.bda-instrumentation.ssbo.comp │ │ ├── structured.bindless.bda-instrumentation.comp │ │ ├── structured.bindless.bda-instrumentation.ssbo.comp │ │ ├── structured.root-descriptor.bda-instrumentation.ssbo.comp │ │ └── typed.bindless.bda-instrumentation.comp │ ├── llvm-builtin/ │ │ ├── alloca-robustness-cases.extended-robustness.vert │ │ ├── alloca.frag │ │ ├── atomic-bin-op.comp │ │ ├── atomic-compare-exchange.comp │ │ ├── atomic-compare-exchange.sm66.ssbo.comp │ │ ├── bool-to-fp.frag │ │ ├── constant-expression-cast.comp │ │ ├── constant-expression-gep.comp │ │ ├── fadd.frag │ │ ├── fast-mul-div-pair.comp │ │ ├── fcmp_eq.frag │ │ ├── fcmp_ge.frag │ │ ├── fcmp_gt.frag │ │ ├── fcmp_le.frag │ │ ├── fcmp_lt.frag │ │ ├── fcmp_ne.frag │ │ ├── fdiv.frag │ │ ├── fmul.frag │ │ ├── frem.frag │ │ ├── fsub.frag │ │ ├── glitched-integer-width.comp │ │ ├── groupshared.comp │ │ ├── icmp_eq.frag │ │ ├── icmp_ne.frag │ │ ├── icmp_sge.frag │ │ ├── icmp_sgt.frag │ │ ├── icmp_sle.frag │ │ ├── icmp_slt.frag │ │ ├── icmp_uge.frag │ │ ├── icmp_ugt.frag │ │ ├── icmp_ule.frag │ │ ├── icmp_ult.frag │ │ ├── logical-and.frag │ │ ├── logical-equal.frag │ │ ├── logical-not-equal.frag │ │ ├── logical-or.frag │ │ ├── lut.frag │ │ ├── min16-phi.sm60.comp │ │ ├── precise_math.frag │ │ └── zext-bool.frag │ ├── memory-model/ │ │ ├── uav-coherent-promotion.bindless.ssbo.comp │ │ ├── uav-coherent-promotion.root-descriptor.ssbo.comp │ │ ├── uav-coherent-promotion.sm66.bindless.ssbo.comp │ │ ├── uav-coherent-promotion.sm66.ssbo.comp │ │ ├── uav-coherent-promotion.ssbo.comp │ │ ├── uav-coherent.root-descriptor.ssbo.comp │ │ ├── uav-coherent.sm66.ssbo.comp │ │ └── uav-coherent.ssbo.comp │ ├── nvapi/ │ │ ├── bringup.nvapi.ssbo.rgen │ │ ├── get-special-global-timer.nvapi.ssbo.rgen │ │ ├── hit-object.local-root-signature.noglsl.nvapi.ssbo.rgen │ │ ├── nvHLSLExtns.h │ │ ├── nvHLSLExtnsInternal.h │ │ ├── nvShaderExtnEnums.h │ │ ├── ray-query-cluster-id.nvapi.comp │ │ ├── rt-cluster-id.nvapi.rany │ │ └── shuffle.nvapi.ssbo.comp │ ├── opts/ │ │ ├── fp16-fp32-fp16-1.ssbo.comp │ │ ├── sabs.frag │ │ ├── sneg.frag │ │ ├── wave-read-lane-first-heap.sm66.comp │ │ ├── wave-read-lane-first.bindless.local-root-signature.rmiss │ │ ├── wave-read-lane-first.comp │ │ ├── wave-read-lane-first.no-legacy-cbuf-layout.comp │ │ ├── wave-read-lane-first.no-legacy-cbuf-layout.sm60.comp │ │ ├── wave-read-lane-first.sm60.comp │ │ ├── wave-read-lane-first.sm66.comp │ │ ├── wave-read-lane-first.ssbo.comp │ │ ├── wave-read-lane-first.ssbo.rgen │ │ ├── wave-read-lane-first.ssbo.sm60.comp │ │ ├── wave-read-lane-first.ssbo.sm66.comp │ │ └── wave-read-lane-first.ssbo.sm66.rgen │ ├── raw-access/ │ │ ├── bab-double1.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-double2.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-double3.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-double4.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float1.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float2.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float3.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float4.raw-access-chains.noglsl.ssbo.comp │ │ ├── bab-float4x4.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float1.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float2.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float3.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float4.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-float4x4.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-half1.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.comp │ │ ├── structured-min16float1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp │ │ ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.comp │ │ ├── structured-min16uint1.raw-access-chains.noglsl.ssbo.sm60.native-fp16.comp │ │ ├── structured-uint1.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-uint2.raw-access-chains.noglsl.ssbo.comp │ │ ├── structured-uint3.raw-access-chains.noglsl.ssbo.comp │ │ └── structured-uint4.raw-access-chains.noglsl.ssbo.comp │ ├── resources/ │ │ ├── acceleration-structure.bindless.rgen │ │ ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.raw-va-stride-offset.rgen │ │ ├── acceleration-structure.bindless.ssbo-rtas.local-root-signature.rgen │ │ ├── acceleration-structure.local-root-signature.root-descriptor.rgen │ │ ├── basic.input-attachment.frag │ │ ├── buffer-16bit.ssbo.bindless.comp │ │ ├── buffer-16bit.ssbo.bindless.ssbo-align.comp │ │ ├── buffer-16bit.ssbo.comp │ │ ├── buffer-64bit.ssbo.bindless.ssbo-align.comp │ │ ├── buffer-64bit.ssbo.comp │ │ ├── buffer-alignment-fixup.bindless.root-constant.offset-layout.typed-buffer-offset.comp │ │ ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.comp │ │ ├── buffer-alignment-fixup.ssbo.ssbo-align.bindless.root-constant.offset-layout.typed-buffer-offset.comp │ │ ├── cbuf.root-constant.min16float.sm60.frag │ │ ├── cbuf.root-constant.min16float.sm60.native-fp16.frag │ │ ├── cbuf.root-constant.min16int.sm60.frag │ │ ├── cbuf.root-constant.min16int.sm60.native-fp16.frag │ │ ├── cbv-array-nonuniform.frag │ │ ├── cbv-array.frag │ │ ├── cbv-dynamic.no-legacy-cbuf-layout.local-root-signature.rmiss │ │ ├── cbv-indexing.frag │ │ ├── cbv-indexing.sm66.frag │ │ ├── cbv-legacy-fp16-fp64.frag │ │ ├── cbv-legacy-fp16-fp64.root-descriptor.frag │ │ ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.frag │ │ ├── cbv-legacy-fp16-fp64.root-descriptor.sm60.native-fp16.frag │ │ ├── cbv-legacy-fp16-fp64.sm60.frag │ │ ├── cbv-legacy-fp16-fp64.sm60.native-fp16.frag │ │ ├── cbv.bindless.root-constant.cbv-as-ssbo.frag │ │ ├── cbv.bindless.root-constant.frag │ │ ├── cbv.frag │ │ ├── cbv.no-legacy-cbuf-layout.bindless.frag │ │ ├── cbv.no-legacy-cbuf-layout.index-divider.frag │ │ ├── cbv.no-legacy-cbuf-layout.local-root-signature.rmiss │ │ ├── cbv.no-legacy-cbuf-layout.native-fp16.sm60.frag │ │ ├── cbv.no-legacy-cbuf-layout.root-constant.frag │ │ ├── cbv.root-constant.frag │ │ ├── cbv.root-descriptor.no-legacy-cbuf-layout.frag │ │ ├── combined-image-sampler-reuse.frag │ │ ├── dynamic-root-constant.root-constant.bindless.root-descriptor.comp │ │ ├── min16-alloca-groupshared.sm60.comp │ │ ├── min16float-ssbo-dxr.ssbo.rgen │ │ ├── raw-buffer-addressing.comp │ │ ├── raw-buffer-addressing.ssbo.comp │ │ ├── root-bda.root-descriptor.comp │ │ ├── root-bda.root-descriptor.sm60.comp │ │ ├── root-constant-with-bda.root-descriptor.root-constant.comp │ │ ├── rt-resources.bindless.local-root-signature.rmiss │ │ ├── rt-resources.bindless.rmiss │ │ ├── rt-resources.rmiss │ │ ├── sampler-array.frag │ │ ├── sampler-indexing.frag │ │ ├── sampler-indexing.sm66.frag │ │ ├── sampler.bindless.root-constant.frag │ │ ├── sm66/ │ │ │ ├── atomics-64bit-groupshared.ssbo.sm66.comp │ │ │ ├── atomics-64bit.root-descriptor.sm66.comp │ │ │ ├── atomics-64bit.ssbo.sm66.comp │ │ │ ├── atomics-component-alias.sm66.comp │ │ │ ├── atomics-typed-64bit-heap.sm66.comp │ │ │ ├── atomics-typed-64bit.bindless.sm66.comp │ │ │ ├── atomics-typed-64bit.sm66.comp │ │ │ ├── binding-range-selection.bindless.sm66.comp │ │ │ ├── binding-range-selection.sm66.comp │ │ │ ├── buffer-64bit-double.ssbo.sm66.comp │ │ │ ├── buffer-64bit.ssbo.sm66.comp │ │ │ ├── buffer-64bit.ssbo.ssbo-align.sm66.comp │ │ │ ├── cbuffer-heap.sm66.frag │ │ │ ├── cbv.no-legacy-cbuf-layout.bindless.sm66.frag │ │ │ ├── cbv.no-legacy-cbuf-layout.sm66.frag │ │ │ ├── raw-buffer-heap.sm66.frag │ │ │ ├── raw-buffer-heap.ssbo.sm66.frag │ │ │ ├── raw-buffer-heap.typed-buffer-offset.sm66.frag │ │ │ ├── raw-buffers-binding.ssbo.bindless.sm66.frag │ │ │ ├── raw-buffers-binding.ssbo.bindless.ssbo-align.sm66.frag │ │ │ ├── raygen-heap.sm66.rgen │ │ │ ├── raygen-heap.ssbo-rtas.raw-va-stride-offset.sm66.rgen │ │ │ ├── raygen-heap.ssbo-rtas.sm66.rgen │ │ │ ├── raygen.sm66.rgen │ │ │ ├── raygen.ssbo-rtas.bindless.raw-va-stride-offset.sm66.rgen │ │ │ ├── raygen.ssbo-rtas.bindless.sm66.rgen │ │ │ ├── rw-typed-binding.sm66.frag │ │ │ ├── rw-typed-heap.sm66.frag │ │ │ ├── sampled-types-binding.sm66.frag │ │ │ ├── sampled-types.sm66.frag │ │ │ ├── sampler-binding.sm66.frag │ │ │ ├── sampler-heap.sm66.frag │ │ │ ├── structured-16bit-heap.ssbo.sm66.frag │ │ │ ├── structured-16bit-heap.ssbo.ssbo-align.sm66.frag │ │ │ ├── structured-buffer-heap.sm66.frag │ │ │ ├── structured-buffer-heap.ssbo.sm66.frag │ │ │ ├── structured-buffer-heap.ssbo.ssbo-align.sm66.frag │ │ │ └── structured-buffer-heap.typed-buffer-offset.sm66.frag │ │ ├── srv-array-raw-buffer-nonuniform.frag │ │ ├── srv-array-raw-buffer.frag │ │ ├── srv-array-structured-buffer-nonuniform.frag │ │ ├── srv-array-structured-buffer.frag │ │ ├── srv-array-texture-nonuniform.frag │ │ ├── srv-array-texture.frag │ │ ├── srv-array-typed-buffer-nonuniform.frag │ │ ├── srv-array-typed-buffer.frag │ │ ├── srv-indexing.frag │ │ ├── srv-indexing.sm66.frag │ │ ├── srv-raw-buffer.bindless.root-constant.frag │ │ ├── srv-raw-buffer.bindless.root-constant.ssbo.frag │ │ ├── srv-raw-buffer.ssbo.frag │ │ ├── srv-structured-buffer.bindless.root-constant.frag │ │ ├── srv-structured-buffer.bindless.root-constant.ssbo.frag │ │ ├── srv-structured-buffer.ssbo.frag │ │ ├── srv-texture.bindless.root-constant.frag │ │ ├── srv-texture.bindless.root-constant.inline-ubo.frag │ │ ├── srv-typed-buffer.bindless.root-constant.frag │ │ ├── srv-uav-raw.typed-buffer-offset.comp │ │ ├── srv-uav.typed-buffer-offset.comp │ │ ├── ssbo-minprecision.sm60.native-fp16.frag │ │ ├── ssbo-minprecision.sm60.ssbo.frag │ │ ├── ssbo-minprecision.sm60.ssbo.native-fp16.frag │ │ ├── ssbo-minprecision.sm60.ssbo.native-fp16.root-descriptor.frag │ │ ├── ssbo-minprecision.sm60.ssbo.root-descriptor.frag │ │ ├── subobject-parsing.rgen │ │ ├── typed-resources-16bit-sparse.frag │ │ ├── typed-resources-16bit.bindless.frag │ │ ├── typed-resources-16bit.frag │ │ ├── typed-resources-16bit.sm60.bindless.frag │ │ ├── typed-resources-16bit.sm60.frag │ │ ├── typed-resources-16bit.sm60.native-fp16.bindless.frag │ │ ├── typed-resources-16bit.sm60.native-fp16.frag │ │ ├── uav-array-raw-buffer-nonuniform.frag │ │ ├── uav-array-raw-buffer.frag │ │ ├── uav-array-structured-buffer-nonuniform.frag │ │ ├── uav-array-structured-buffer-nonuniform.ssbo.bindless.root-constant.frag │ │ ├── uav-array-structured-buffer-nonuniform.ssbo.frag │ │ ├── uav-array-structured-buffer.frag │ │ ├── uav-array-texture-nonuniform.frag │ │ ├── uav-array-texture.frag │ │ ├── uav-array-typed-buffer-nonuniform.frag │ │ ├── uav-array-typed-buffer.frag │ │ ├── uav-counter-array.ssbo.frag │ │ ├── uav-counter-array.ssbo.sm66.frag │ │ ├── uav-counter-array.ssbo.sm66.uav-counter-ssbo.frag │ │ ├── uav-counter-array.ssbo.uav-counter-ssbo.frag │ │ ├── uav-counter-heap.sm66.bindless.ssbo.frag │ │ ├── uav-counter-heap.sm66.uav-counter-ssbo.bindless.ssbo.frag │ │ ├── uav-counter-heap.sm66.uav-counter-texel-buffer.bindless.ssbo.frag │ │ ├── uav-counter.bindless.nobda.root-constant.comp │ │ ├── uav-counter.bindless.nobda.root-constant.raw-va-stride-offset.comp │ │ ├── uav-counter.bindless.root-constant.comp │ │ ├── uav-counter.bindless.root-constant.raw-va-stride-offset.comp │ │ ├── uav-counter.bindless.root-constant.raw-va-stride-offset.heap-raw-va-cbv.comp │ │ ├── uav-counter.bindless.root-constant.uav-counter-ssbo.comp │ │ ├── uav-counter.ssbo.comp │ │ ├── uav-counter.ssbo.raw-va-stride-offset.comp │ │ ├── uav-counter.ssbo.uav-counter-ssbo.comp │ │ ├── uav-indexing.frag │ │ ├── uav-indexing.sm66.frag │ │ ├── uav-raw-buffer.bindless.root-constant.frag │ │ ├── uav-raw-buffer.ssbo.frag │ │ ├── uav-structured-buffer.bindless.root-constant.frag │ │ ├── uav-typed-buffer.bindless.root-constant.frag │ │ └── uav-typed.typed-uav-without-format.comp │ ├── rov/ │ │ ├── rov-bab.bindless.frag │ │ ├── rov-bab.frag │ │ ├── rov-bab.ssbo.bindless.frag │ │ ├── rov-bab.ssbo.frag │ │ ├── rov-bab.ssbo.root-descriptor.frag │ │ ├── rov-branch-early-return.frag │ │ ├── rov-branch.frag │ │ ├── rov-buffer.frag │ │ ├── rov-inloop-2.frag │ │ ├── rov-inloop.frag │ │ ├── rov-per-sample.sm66.frag │ │ ├── rov-postloop.frag │ │ ├── rov-structured.bindless.frag │ │ ├── rov-structured.frag │ │ ├── rov-structured.ssbo.bindless.frag │ │ ├── rov-structured.ssbo.frag │ │ ├── rov-structured.ssbo.root-descriptor.frag │ │ ├── rov-tex1d.bindless.frag │ │ ├── rov-tex1d.frag │ │ ├── rov-tex1darray.bindless.frag │ │ ├── rov-tex1darray.frag │ │ ├── rov-tex2d.bindless.frag │ │ ├── rov-tex2d.frag │ │ ├── rov-tex2darray.bindless.frag │ │ ├── rov-tex2darray.frag │ │ ├── rov-tex3d.bindless.frag │ │ ├── rov-tex3d.frag │ │ ├── rov-undef.frag │ │ └── rov.sm66.frag │ ├── sampler-feedback/ │ │ ├── sampler-feedback.frag │ │ └── sampler-feedback.sm66.frag │ ├── semantics/ │ │ ├── clip-cull-distance.vert │ │ ├── clip-cull.frag │ │ ├── clip-distance-cols.frag │ │ ├── clip-distance-cols.vert │ │ ├── clip-distance-flatten.frag │ │ ├── clip-distance-flatten.vert │ │ ├── clip-distance-rows.frag │ │ ├── clip-distance-rows.vert │ │ ├── clip-distance-single.vert │ │ ├── coverage.frag │ │ ├── depth-greater-equal.frag │ │ ├── depth-less-equal.frag │ │ ├── depth.frag │ │ ├── early-depth-stencil.frag │ │ ├── inner-coverage.noglsl.frag │ │ ├── is-front-face.frag │ │ ├── position.frag │ │ ├── primitive-id.frag │ │ ├── primitive-id.geom │ │ ├── render-target-array-index.frag │ │ ├── render-target-array-index.geom │ │ ├── sample-rate-pos.frag │ │ ├── stencil-ref.frag │ │ ├── sv-shading-rate.noglsl.frag │ │ ├── sv-shading-rate.noglsl.vert │ │ ├── view-id.frag │ │ ├── view-id.vert │ │ ├── viewport-array-index.frag │ │ └── viewport-array-index.geom │ ├── stages/ │ │ ├── boolean-io.vert │ │ ├── callable-chain.rcall │ │ ├── callable.rcall │ │ ├── closesthit.rclosest │ │ ├── domain-clip-cull.tese │ │ ├── domain-patch-input-integer-io.tese │ │ ├── domain.tese │ │ ├── extra_output.dual-source-blending.frag │ │ ├── extra_output_reordered.dual-source-blending.frag │ │ ├── geometry-clip-cull.geom │ │ ├── geometry-input-line.geom │ │ ├── geometry-input-lineadj.geom │ │ ├── geometry-input-point.geom │ │ ├── geometry-input-triangle.geom │ │ ├── geometry-input-triangleadj.geom │ │ ├── geometry-instancing.geom │ │ ├── geometry-output-line.geom │ │ ├── geometry-output-point.geom │ │ ├── geometry-streams.geom │ │ ├── hull-arrays.tesc │ │ ├── hull-clip-cull.tesc │ │ ├── hull-patch-output-integer-io.tesc │ │ ├── hull-single-cp.tesc │ │ ├── hull.tesc │ │ ├── mesh-basic-line.mesh │ │ ├── mesh-basic.mesh │ │ ├── mesh-clip-cull.mesh │ │ ├── raygen-complex-storage-class.rgen │ │ ├── raygen-skip-inactive-resources.rgen │ │ ├── raygen.rgen │ │ ├── raymiss-chain.rmiss │ │ ├── raymiss.rmiss │ │ ├── simple.dual-source-blending.frag │ │ ├── simple.invariant.vert │ │ ├── stage-input-output.16bit-io.frag │ │ ├── stage-input-output.frag │ │ ├── stream-out.stream-out.vert │ │ ├── swizzle.rt-swizzle.frag │ │ ├── task-basic.task │ │ ├── vertex-array-input.vert │ │ ├── vertex-array-output.vert │ │ └── vertex-input-remapping.vert │ ├── vectorization/ │ │ ├── copy-byte-address.ssbo.comp │ │ ├── copy-composite-2.ssbo.comp │ │ ├── copy-composite.ssbo.comp │ │ ├── copy-composite.ssbo.ssbo-align.bindless.comp │ │ ├── copy-double2.ssbo.comp │ │ ├── copy-double2.ssbo.ssbo-align.bindless.comp │ │ ├── copy-double3.ssbo.comp │ │ ├── copy-double3.ssbo.ssbo-align.bindless.comp │ │ ├── copy-double4.ssbo.comp │ │ ├── copy-float2.ssbo.comp │ │ ├── copy-float2.ssbo.ssbo-align.bindless.comp │ │ ├── copy-float2x2.ssbo.comp │ │ ├── copy-float3.ssbo.comp │ │ ├── copy-float3.ssbo.ssbo-align.bindless.comp │ │ ├── copy-float4x4.ssbo.comp │ │ ├── copy-half2.ssbo.comp │ │ ├── copy-half2.ssbo.ssbo-align.bindless.comp │ │ ├── copy-half3.ssbo.comp │ │ ├── copy-half3.ssbo.ssbo-align.bindless.comp │ │ ├── copy-half4.ssbo.comp │ │ └── copy-half4.ssbo.ssbo-align.bindless.comp │ ├── view-instancing/ │ │ ├── geom/ │ │ │ ├── basic.view-instancing.last-pre-raster.geom │ │ │ ├── basic.view-instancing.last-pre-raster.view-instance-mask.geom │ │ │ ├── basic.view-instancing.view-instancing-multiview.last-pre-raster.geom │ │ │ ├── basic.view-instancing.view-instancing-multiview.view-instancing-viewport-offset.last-pre-raster.geom │ │ │ └── basic.view-instancing.view-instancing-viewport-offset.last-pre-raster.geom │ │ ├── mesh/ │ │ │ ├── basic-export-viewport-layer.view-instancing.last-pre-raster.mesh │ │ │ ├── basic-export-viewport-layer.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh │ │ │ ├── basic-few-thread.view-instancing.last-pre-raster.mesh │ │ │ ├── basic-many-thread.view-instancing.last-pre-raster.mesh │ │ │ ├── basic.view-instancing.last-pre-raster.mesh │ │ │ ├── basic.view-instancing.last-pre-raster.view-instance-mask.mesh │ │ │ ├── basic.view-instancing.last-pre-raster.view-instancing-viewport-offset.mesh │ │ │ └── basic.view-instancing.mesh │ │ ├── tesc/ │ │ │ ├── basic.view-instancing.tesc │ │ │ └── basic.view-instancing.view-instancing-multiview.tesc │ │ ├── tese/ │ │ │ ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.last-pre-raster.tese │ │ │ ├── domain-export-layer-viewport.view-instancing.view-instancing-multiview.tese │ │ │ ├── domain.view-instancing.last-pre-raster.tese │ │ │ ├── domain.view-instancing.tese │ │ │ ├── domain.view-instancing.view-instancing-multiview.last-pre-raster.tese │ │ │ ├── domain.view-instancing.view-instancing-multiview.tese │ │ │ ├── domain.view-instancing.view-instancing-viewport-offset.last-pre-raster.tese │ │ │ └── domain.view-instancing.view-instancing-viewport-offset.tese │ │ └── vert/ │ │ ├── basic.view-instancing.export-layer-viewport.last-pre-raster.vert │ │ ├── basic.view-instancing.export-layer-viewport.vert │ │ ├── basic.view-instancing.export-layer-viewport.view-instancing-multiview.vert │ │ ├── basic.view-instancing.export-layer-viewport.view-instancing-viewport-offset.vert │ │ ├── basic.view-instancing.export-layer.last-pre-raster.vert │ │ ├── basic.view-instancing.export-layer.vert │ │ ├── basic.view-instancing.export-layer.view-instancing-multiview.vert │ │ ├── basic.view-instancing.export-layer.view-instancing-viewport-offset.vert │ │ ├── basic.view-instancing.export-viewport.last-pre-raster.vert │ │ ├── basic.view-instancing.export-viewport.vert │ │ ├── basic.view-instancing.export-viewport.view-instancing-multiview.vert │ │ ├── basic.view-instancing.export-viewport.view-instancing-viewport-offset.vert │ │ ├── basic.view-instancing.last-pre-raster.vert │ │ ├── basic.view-instancing.vert │ │ ├── basic.view-instancing.view-instance-mask.last-pre-raster.vert │ │ ├── basic.view-instancing.view-instance-mask.vert │ │ ├── basic.view-instancing.view-instancing-multiview.vert │ │ └── basic.view-instancing.view-instancing-viewport-offset.vert │ └── vkmm/ │ ├── coopmat.sm66.ssbo.vkmm.comp │ ├── cross_group_sharing.vkmm.node.inline-ubo.comp │ ├── descriptor_qa.bindless.descriptor-qa.vkmm.comp │ ├── groupshared.vkmm.comp │ ├── hull.vkmm.tesc │ ├── image-load-store.vkmm.comp │ ├── image-load-store.vkmm.sm66.comp │ ├── memory-model/ │ │ ├── uav-coherent-promotion.bindless.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.root-descriptor.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.sm66.bindless.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.sm66.ssbo.vkmm.comp │ │ ├── uav-coherent-promotion.ssbo.vkmm.comp │ │ ├── uav-coherent.root-descriptor.ssbo.vkmm.comp │ │ ├── uav-coherent.sm66.ssbo.vkmm.comp │ │ └── uav-coherent.ssbo.vkmm.comp │ ├── report-hit.vkmm.rint │ ├── rov-structured.vkmm.frag │ ├── rov-tex2d.vkmm.frag │ └── wmma_ags.h ├── show_graph.py ├── spirv_module.cpp ├── spirv_module.hpp ├── spirv_module_instrumentation.cpp ├── spirv_module_instrumentation.hpp ├── test_shaders.py ├── third_party/ │ ├── CMakeLists.txt │ ├── bc-decoder/ │ │ ├── llvm_bitreader.h │ │ ├── llvm_decoder.cpp │ │ └── llvm_decoder.h │ ├── cli_parser/ │ │ ├── cli_parser.cpp │ │ └── cli_parser.hpp │ └── glslang-spirv/ │ ├── InReadableOrder.cpp │ ├── Logger.cpp │ ├── Logger.h │ ├── SpvBuilder.cpp │ ├── SpvBuilder.h │ └── spvIR.h └── util/ ├── thread_local_allocator.cpp └── thread_local_allocator.hpp ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ # The style used for all options not specifically set in the configuration. BasedOnStyle: LLVM # The extra indent or outdent of access modifiers, e.g. public:. AccessModifierOffset: -4 # If true, aligns escaped newlines as far left as possible. Otherwise puts them into the right-most column. AlignEscapedNewlinesLeft: true # If true, aligns trailing comments. AlignTrailingComments: false # Allow putting all parameters of a function declaration onto the next line even if BinPackParameters is false. AllowAllParametersOfDeclarationOnNextLine: false # Allows contracting simple braced statements to a single line. AllowShortBlocksOnASingleLine: false # If true, short case labels will be contracted to a single line. AllowShortCaseLabelsOnASingleLine: false # Dependent on the value, int f() { return 0; } can be put on a single line. Possible values: None, Inline, All. AllowShortFunctionsOnASingleLine: None # If true, if (a) return; can be put on a single line. AllowShortIfStatementsOnASingleLine: false # If true, while (true) continue; can be put on a single line. AllowShortLoopsOnASingleLine: false # If true, always break after function definition return types. AlwaysBreakAfterDefinitionReturnType: false # If true, always break before multiline string literals. AlwaysBreakBeforeMultilineStrings: false # If true, always break after the template<...> of a template declaration. AlwaysBreakTemplateDeclarations: true # If false, a function call's arguments will either be all on the same line or will have one line each. BinPackArguments: true # If false, a function declaration's or function definition's parameters will either all be on the same line # or will have one line each. BinPackParameters: true # The way to wrap binary operators. Possible values: None, NonAssignment, All. BreakBeforeBinaryOperators: None # The brace breaking style to use. Possible values: Attach, Linux, Stroustrup, Allman, GNU. BreakBeforeBraces: Allman # If true, ternary operators will be placed after line breaks. BreakBeforeTernaryOperators: false # Always break constructor initializers before commas and align the commas with the colon. BreakConstructorInitializersBeforeComma: true # The column limit. A column limit of 0 means that there is no column limit. ColumnLimit: 120 # A regular expression that describes comments with special meaning, which should not be split into lines or otherwise changed. CommentPragmas: '^ *' # If the constructor initializers don't fit on a line, put each initializer on its own line. ConstructorInitializerAllOnOneLineOrOnePerLine: false # The number of characters to use for indentation of constructor initializer lists. ConstructorInitializerIndentWidth: 4 # Indent width for line continuations. ContinuationIndentWidth: 4 # If true, format braced lists as best suited for C++11 braced lists. Cpp11BracedListStyle: false # Disables formatting at all. DisableFormat: false # A vector of macros that should be interpreted as foreach loops instead of as function calls. #ForEachMacros: '' # Indent case labels one level from the switch statement. # When false, use the same indentation level as for the switch statement. # Switch statement body is always indented one level more than case labels. IndentCaseLabels: false # The number of columns to use for indentation. IndentWidth: 4 # Indent if a function definition or declaration is wrapped after the type. IndentWrappedFunctionNames: false # If true, empty lines at the start of blocks are kept. KeepEmptyLinesAtTheStartOfBlocks: true # Language, this format style is targeted at. Possible values: None, Cpp, Java, JavaScript, Proto. Language: Cpp # The maximum number of consecutive empty lines to keep. MaxEmptyLinesToKeep: 1 # The indentation used for namespaces. Possible values: None, Inner, All. NamespaceIndentation: None # The penalty for breaking a function call after "call(". PenaltyBreakBeforeFirstCallParameter: 19 # The penalty for each line break introduced inside a comment. PenaltyBreakComment: 300 # The penalty for breaking before the first <<. PenaltyBreakFirstLessLess: 120 # The penalty for each line break introduced inside a string literal. PenaltyBreakString: 1000 # The penalty for each character outside of the column limit. PenaltyExcessCharacter: 1000000 # Penalty for putting the return type of a function onto its own line. PenaltyReturnTypeOnItsOwnLine: 1000000000 # Pointer and reference alignment style. Possible values: Left, Right, Middle. PointerAlignment: Right # If true, a space may be inserted after C style casts. SpaceAfterCStyleCast: false # If false, spaces will be removed before assignment operators. SpaceBeforeAssignmentOperators: true # Defines in which cases to put a space before opening parentheses. Possible values: Never, ControlStatements, Always. SpaceBeforeParens: ControlStatements # If true, spaces may be inserted into '()'. SpaceInEmptyParentheses: false # The number of spaces before trailing line comments (// - comments). SpacesBeforeTrailingComments: 1 # If true, spaces will be inserted after '<' and before '>' in template argument lists. SpacesInAngles: false # If true, spaces may be inserted into C style casts. SpacesInCStyleCastParentheses: false # If true, spaces are inserted inside container literals (e.g. ObjC and Javascript array and dict literals). SpacesInContainerLiterals: false # If true, spaces will be inserted after '(' and before ')'. SpacesInParentheses: false # If true, spaces will be inserted after '[' and befor']'. SpacesInSquareBrackets: false # Format compatible with this standard, e.g. use A > instead of A> for LS_Cpp03. Possible values: Cpp03, Cpp11, Auto. Standard: Cpp11 # The number of columns used for tab stops. TabWidth: 4 # The way to use tab characters in the resulting file. Possible values: Never, ForIndentation, Always. UseTab: ForIndentation # Do not reflow comments ReflowComments: false ================================================ FILE: .gitattributes ================================================ shaders/**/* linguist-language=GLSL reference/**/* linguist-generated ================================================ FILE: .gitignore ================================================ /cmake-build-debug /cmake-build-release *.iml /.idea /.vs /.vscode /external/dxc* /external/DirectXShaderCompiler /external/llvm /shaders-dxil /shaders-dxbc /reference/shaders-dxil /reference/shaders-dxbc /build /out/build ================================================ FILE: .gitmodules ================================================ [submodule "third_party/spirv-headers"] path = third_party/spirv-headers url = https://github.com/KhronosGroup/SPIRV-Headers [submodule "third_party/SPIRV-Tools"] path = third_party/SPIRV-Tools url = https://github.com/KhronosGroup/SPIRV-Tools [submodule "third_party/SPIRV-Cross"] path = third_party/SPIRV-Cross url = https://github.com/KhronosGroup/SPIRV-Cross [submodule "third_party/dxbc-spirv"] path = subprojects/dxbc-spirv url = https://github.com/doitsujin/dxbc-spirv ================================================ FILE: CMakeLists.txt ================================================ # # Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation # # SPDX-License-Identifier: MIT # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. cmake_minimum_required(VERSION 3.10) set(CMAKE_CXX_STANDARD 14) set(CMAKE_C_STANDARD 99) project(dxil-spirv LANGUAGES CXX C) add_library(dxil-debug STATIC debug/logging.hpp debug/logging.cpp) target_include_directories(dxil-debug PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/debug) set_target_properties(dxil-debug PROPERTIES POSITION_INDEPENDENT_CODE ON) option(DXIL_SPIRV_CLI "Enable CLI support." ON) option(DXIL_SPIRV_NATIVE_LLVM "Enable native LLVM support." OFF) include(GNUInstallDirs) if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")) set(DXIL_SPV_CXX_FLAGS -Wall -Wextra -Wno-missing-field-initializers -Wno-empty-body -Wno-unused-parameter -fno-exceptions -fno-rtti -fvisibility=hidden) elseif (MSVC) set(DXIL_SPV_CXX_FLAGS /D_CRT_SECURE_NO_WARNINGS /wd4996 /wd4244 /wd4267 /wd4244 /wd4309 /wd4005 /MP /DNOMINMAX) endif() add_library(dxil-utils STATIC util/thread_local_allocator.hpp util/thread_local_allocator.cpp) target_include_directories(dxil-utils PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/util) target_compile_options(dxil-utils PRIVATE ${DXIL_SPV_CXX_FLAGS}) set_target_properties(dxil-utils PROPERTIES POSITION_INDEPENDENT_CODE ON) add_subdirectory(third_party EXCLUDE_FROM_ALL) add_subdirectory(bc EXCLUDE_FROM_ALL) add_subdirectory(external EXCLUDE_FROM_ALL) add_library(spirv-module STATIC ir.hpp descriptor_qa.cpp descriptor_qa.hpp spirv_module.hpp spirv_module.cpp spirv_module_instrumentation.hpp spirv_module_instrumentation.cpp) set_target_properties(spirv-module PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(spirv-module PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(spirv-module PUBLIC glslang-spirv-builder dxil-spirv-headers) target_link_libraries(spirv-module PRIVATE dxil-utils dxil-debug) target_compile_options(spirv-module PRIVATE ${DXIL_SPV_CXX_FLAGS}) add_library(dxil-converter STATIC memory_stream.hpp memory_stream.cpp llvm_bitcode_parser.hpp llvm_bitcode_parser.cpp dxil.hpp dxil_converter.hpp dxil_converter.cpp cfg_structurizer.hpp cfg_structurizer.cpp node_pool.hpp node_pool.cpp node.hpp node.cpp dxil_parser.hpp dxil_parser.cpp scratch_pool.hpp opcodes/converter_impl.hpp opcodes/opcodes.hpp opcodes/dxil/dxil_common.hpp opcodes/dxil/dxil_common.cpp opcodes/dxil/dxil_resources.hpp opcodes/dxil/dxil_resources.cpp opcodes/dxil/dxil_compute.hpp opcodes/dxil/dxil_compute.cpp opcodes/dxil/dxil_arithmetic.hpp opcodes/dxil/dxil_arithmetic.cpp opcodes/dxil/dxil_pixel_ops.hpp opcodes/dxil/dxil_pixel_ops.cpp opcodes/dxil/dxil_geometry.hpp opcodes/dxil/dxil_geometry.cpp opcodes/dxil/dxil_tessellation.hpp opcodes/dxil/dxil_tessellation.cpp opcodes/dxil/dxil_waveops.hpp opcodes/dxil/dxil_waveops.cpp opcodes/dxil/dxil_sampling.hpp opcodes/dxil/dxil_sampling.cpp opcodes/dxil/dxil_buffer.hpp opcodes/dxil/dxil_buffer.cpp opcodes/dxil/dxil_ray_tracing.hpp opcodes/dxil/dxil_ray_tracing.cpp opcodes/dxil/dxil_mesh.hpp opcodes/dxil/dxil_mesh.cpp opcodes/dxil/dxil_workgraph.hpp opcodes/dxil/dxil_workgraph.cpp opcodes/dxil/dxil_ags.hpp opcodes/dxil/dxil_ags.cpp opcodes/dxil/dxil_nvapi.hpp opcodes/dxil/dxil_nvapi.cpp opcodes/opcodes_llvm_builtins.hpp opcodes/opcodes_llvm_builtins.cpp opcodes/opcodes_dxil_builtins.hpp opcodes/opcodes_dxil_builtins.cpp) set_target_properties(dxil-converter PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(dxil-converter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_compile_options(dxil-converter PRIVATE ${DXIL_SPV_CXX_FLAGS}) target_link_libraries(dxil-converter PRIVATE dxil-debug external::llvm dxil-utils) target_link_libraries(dxil-converter PUBLIC spirv-module) add_library(dxil-spirv-c-shared SHARED dxil_spirv_c.h dxil_spirv_c.cpp) target_include_directories(dxil-spirv-c-shared PUBLIC $ $) target_link_libraries(dxil-spirv-c-shared PRIVATE dxil-debug dxil-converter external::llvm dxil-utils) target_compile_options(dxil-spirv-c-shared PRIVATE ${DXIL_SPV_CXX_FLAGS}) target_compile_definitions(dxil-spirv-c-shared PRIVATE DXIL_SPV_EXPORT_SYMBOLS) set_target_properties(dxil-spirv-c-shared PROPERTIES PUBLIC_HEADERS dxil_spirv_c.h) if (WIN32 AND CMAKE_COMPILER_IS_GNUCXX) target_link_libraries(dxil-spirv-c-shared PRIVATE -static gcc stdc++ winpthread) endif() # If we're linking in full LLVM statically, ensure we don't export all LLVM symbols. if (NOT MSVC AND DXIL_SPIRV_NATIVE_LLVM) set_target_properties(dxil-spirv-c-shared PROPERTIES LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/link.T") endif() add_library(dxil-spirv-c-static STATIC dxil_spirv_c.h dxil_spirv_c.cpp) target_include_directories(dxil-spirv-c-static PUBLIC $ $) target_link_libraries(dxil-spirv-c-static PRIVATE dxil-debug dxil-converter external::llvm dxil-utils) target_compile_options(dxil-spirv-c-static PRIVATE ${DXIL_SPV_CXX_FLAGS}) set_target_properties(dxil-spirv-c-static PROPERTIES PUBLIC_HEADERS dxil_spirv_c.h) set_target_properties(dxil-spirv-c-static PROPERTIES POSITION_INDEPENDENT_CODE ON) if (DXIL_SPIRV_CLI) add_library(cli-parser STATIC third_party/cli_parser/cli_parser.hpp third_party/cli_parser/cli_parser.cpp) target_include_directories(cli-parser PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/third_party/cli_parser) target_link_libraries(cli-parser PUBLIC dxil-debug) target_compile_options(cli-parser PRIVATE ${DXIL_SPV_CXX_FLAGS}) add_executable(dxil-spirv dxil_spirv.cpp) add_executable(dxil-extract dxil_extract.cpp) target_link_libraries(dxil-spirv PRIVATE dxil-spirv-c-shared cli-parser SPIRV-Tools-static spirv-cross-c dxil-debug) target_compile_options(dxil-spirv PRIVATE ${DXIL_SPV_CXX_FLAGS}) target_link_libraries(dxil-extract PRIVATE dxil-spirv-c-shared cli-parser external::llvm) target_compile_options(dxil-extract PRIVATE ${DXIL_SPV_CXX_FLAGS}) if (WIN32 AND CMAKE_COMPILER_IS_GNUCXX) target_link_libraries(dxil-spirv PRIVATE -static gcc stdc++ winpthread) target_link_libraries(dxil-extract PRIVATE -static gcc stdc++ winpthread) endif() if (NOT DXIL_SPV_NATIVE_LLVM) add_executable(dxbc-spirv-sandbox dxbc_spirv_sandbox.cpp) target_link_libraries(dxbc-spirv-sandbox PRIVATE dxil-utils dxil-debug llvm-bc dxil-converter dxbc-spirv dxbc-spirv-test spirv-cross-c SPIRV-Tools-static) endif() endif() set(DXIL_SPV_VERSION_MAJOR 2) set(DXIL_SPV_VERSION_MINOR 66) set(DXIL_SPV_VERSION_PATCH 0) set(DXIL_SPV_VERSION ${DXIL_SPV_VERSION_MAJOR}.${DXIL_SPV_VERSION_MINOR}.${DXIL_SPV_VERSION_PATCH}) set_target_properties(dxil-spirv-c-shared PROPERTIES VERSION ${DXIL_SPV_VERSION} SOVERSION ${DXIL_SPV_VERSION_MAJOR}) set(DXIL_SPV_INSTALL_LIB_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) set(DXIL_SPV_INSTALL_INC_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/pkg-config/dxil-spirv-c-shared.pc.in ${CMAKE_CURRENT_BINARY_DIR}/dxil-spirv-c-shared.pc @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/dxil-spirv-c-shared.pc DESTINATION ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/pkgconfig) if (DXIL_SPIRV_CLI) install(TARGETS dxil-spirv RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) install(TARGETS dxil-extract RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/dxil_spirv_c.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv) install(TARGETS dxil-spirv-c-shared EXPORT dxil_spirv_c_sharedConfig RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/dxil-spirv) install(EXPORT dxil_spirv_c_sharedConfig DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/dxil_spirv_c_shared/cmake) option(DXIL_SPV_MISC_CLI "Enable misc CLI apps." OFF) if (DXIL_SPV_MISC_CLI) add_executable(structurize-test misc/structurize_test.cpp) target_link_libraries(structurize-test PRIVATE dxil-converter SPIRV-Tools-static spirv-cross-c dxil-debug dxil-utils) target_compile_options(structurize-test PRIVATE ${DXIL_SPV_CXX_FLAGS}) endif() ================================================ FILE: DESCRIPTORS.md ================================================ # Full SM 6.0+ descriptor compatibility in Vulkan In this document, I aim to rethink how we implement descriptors. The goal is to efficiently implement descriptors even in bindless scenarios, i.e. SM 5.1/6.0+. ## Descriptor heaps in D3D12 The API lets you allocate a descriptor heap with N elements. Each element can be a SRV, UAV or CBV or any arbitrary type. The API exposes some kind of "stride" here, which implies that max(sizeof(SRV), sizeof(UAV), sizeof(CBV)) is some value, i.e. 32 on drivers I've tested. ## Root signatures Here we can specify up to 64 DWORDs (256 bytes) which gets passed to the shader. - Descriptor table pointer: 1 DWORD, (not two? interesting ...) - Root descriptor (UAV/CBV), apparently not bounds checked? (2 DWORD) - Root constants (1 DWORD each) First thought that comes to mind is that a descriptor table pointer can correlate to a descriptor set, but this will not work. We only have 8 descriptor sets available. (4 is technically min-spec, but only some mobile chips expose that. I think it's fair to rely on 8.) ### Descriptor table pointer as an offset A weird design of D3D12 is SetDescriptorHeaps which lets you bind only two heaps, one SRV/CBV/UAV heap and one SAMPLER heap, and all descriptor table pointers must refer to one of these. Looking at the descriptor table pointer only taking 1 DWORD this starts making sense. We should just encode offsets here into the two heaps. SetDesciptorHeaps now becomes vkCmdBindDescriptorSets directly. The root signature becomes push constants (for 256 byte implementations), or spills into a versioned uniform buffer (for 128 byte implementations and root sig > 128 bytes, which should be rare). ### Descriptor types While descriptor types are kind of irrelevant in D3D12 inside heaps, we have very particular types in Vulkan. `SAMPLED_IMAGE`, `UNIFORM_TEXEL_BUFFER`, `STORAGE_TEXEL_BUFFER`, `UNIFORM_BUFFER` and friends. Ideally, we'd have a "GENERAL" descriptor type which could be anything and we'd save on a lot of bloat in this scenario. With a descriptor heap, we do not know the root signature yet, so we have two choices on how to allocate the descriptors, either with `VARIABLE_COUNT` or not. With `VARIABLE_COUNT` we can declare descriptor set layouts which have the maximum number of bindings we expect to support (At least 1M according to Tier2), and then allocating the descriptor pools we can allocate just the right amount of descriptors. This seems like the correct approach. Two effects of this is that each descriptor type must live in their own descriptor sets, as only one descriptor binding can have VARYING count. So far, we thus have 6 descriptor set layouts which will be common across all pipelines. Each set contains one binding, with VARYING size array of that type. - Set0: `SAMPLED_IMAGE` - Texture - Set1: `UNIFORM_TEXEL_BUFFER` - TypedBuffer, StructuredBuffer, ByteAddressBuffer - Set2: `STORAGE_TEXEL_BUFFER` - RWTypedBuffer, RWStructuredBuffer, RWByteAddressBuffer. Descriptors come in pairs of two, the odd indices can deal with UAV counters. - Set3: `STORAGE_IMAGE` - Set4: `UNIFORM_BUFFER` - Set5: `SAMPLER` This leaves three sets which can be derived from a root signature directly. These include: - Set6: Immutable samplers. It's useful to keep these in their own set since we don't have to deal with push descriptor restrictions of having to push immutable samplers (and thus having to keep track of them as well). - Set4 (reuse `UNIFORM_BUFFER` set): Virtualized bindings. These are bindings we have to repack from descriptor heaps to support implementations with few CBVs (like older Nvidia cards). Due to RS 1.0 volatile descriptor behavior (descriptor needs only be valid in GPU timeline and can change anytime) we need to defer the actual vkUpdateDescriptorSet calls to QueueSubmit() time. Using a descriptor update template here would be nice! Virtualized bindings is currently the *only* path used by vkd3d and it crumbles down for any interesting uses of SM 5.1 and up, i.e. larger arrays of resources. If we use virtualized descriptors, we can pilfer the set used for uniform buffers as that's the descriptor type we're going to virtualize anyways. - Set7 (reuse `UNIFORM_BUFFER` set): Versioned push descriptor set. Here we can place: - Root constants which spill outside maxPushConstantSize - Root descriptors For implementations which don't support push descriptors, we can fallback to a versioned descriptor set instead, just like vkd3d does. #### Why not `STORAGE_BUFFER` for buffer UAVs? Alignment is a big issue for SSBOs, especially on Nvidia. StructuredBuffers can be bound at very awkward alignments and only `STORAGE_TEXEL_BUFFER` of R32UI can express those. This might lead us into an awkward path when dealing with 16-bit load/store in SM 6.2. Using physical storage buffers (PSB) for untyped buffers would be great, but we need to consider out-of-bounds behavior, which PSB does not support. Also, if we go the PSB route, we will have another indirection to consider. Since rather than going: - Load UAV descriptor - Load/Store data we end up with: - Load CBV/UAV descriptor - Load PSB pointer - Load/store data ## Sample shader ``` layout(push_constant) uniform RootConstants { uint descriptor_table_offset0; uint descriptor_table_offset1; uint descriptor_table_offset2; uint descriptor_table_offset3; uint root_constant0; uint root_constant1; uint root_constant2; uint root_constant3; } root; // We can alias descriptors. layout(set = 0, binding = 0) uniform texture2D Tex2D[]; layout(set = 0, binding = 0) uniform texture3D Tex3D[]; layout(set = 1, binding = 0) textureBuffer TypedBuffers[]; layout(set = 1, binding = 0) textureBuffer StructuredBuffers[]; layout(set = 1, binding = 0) textureBuffer ByteAddressBuffers[]; layout(set = 2, binding = 0) imageBuffer RWTypedBuffers[]; layout(set = 2, binding = 1, r32ui) uimageBuffer RWStructureBuffers[]; layout(set = 2, binding = 2, r32ui) uimageBuffer RWByteAddressBuffers[]; layout(set = 3, binding = 0) uniform image2D RWTex2D[]; layout(set = 3, binding = 0) uniform image3D RWTex3D[]; #if SUPPORTS_MANY_CBVS layout(set = 4, binding = 0, std140) uniform UBOs { vec4 data[MAX_SIZE]; } CBV[]; #else // Versioned descriptors. layout(set = 4, binding = 0, std140) uniform UBO0 { vec4 data[MAX_SIZE]; } ubo0; layout(set = 4, binding = 1, std140) uniform UBO0 { vec4 data[MAX_SIZE]; } ubo0; #endif layout(set = 5, binding = 0) uniform sampler DynamicSamplers[]; layout(set = 6, binding = 0) uniform sampler ImmutableSampler0; layout(set = 6, binding = 1) uniform sampler ImmutableSampler1; layout(set = 6, binding = 2) uniform sampler ImmutableSampler2; // Root descriptors. layout(set = 7, binding = 0) uniform RootCBV0 { vec4 data[MAX_SIZE]; } root_cbv0; layout(set = 7, binding = 1) uniform RootCBV1 { vec4 data[MAX_SIZE]; } root_cbv1; void main() { const uint OffsetIntoRootTable = 42; // This is deduced from D3D12_DESCRIPTOR_RANGE. // descriptor_table_offset is offset into a SetDescriptorHeaps, which we can find by looking at // SetGraphicsRootDescriptor table compared to SetDescriptorHeaps. // Tack on nonuniformEXT as required by IL. texelFetch(Tex2D[OffsetIntoRootTable + root.descriptor_table_offset1]); } ================================================ FILE: LICENSE.MIT ================================================ Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation SPDX-License-Identifier: MIT Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For third_party/bc-decoder: /****************************************************************************** * The MIT License (MIT) * * Copyright (c) 2019-2020 Baldur Karlsson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. ******************************************************************************/ For third_party/glslang-spirv (glslang): -------------------------------------------------------------------------------- The MIT License -------------------------------------------------------------------------------- Copyright 2020 The Khronos Group Inc Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # dxil-spirv This project aims to provide translation of DXIL (SM 6.x) shaders to SPIR-V which can be used in the vkd3d project, which implements D3D12 on top of Vulkan. Using [dxbc-spirv](https://github.com/doitsujin/dxbc-spirv) it also handles legacy DXBC shaders. ## Building ### Dependencies Check out submodules first with `git submodule update --init --recursive`. No external dependencies apart from the submodules are required to build. This project implements a "small" LLVM C++ API subset which acts as a drop-in replacement for the full LLVM. It is possible to build against the true LLVM C++ API if llvm is checked out in `external/llvm` and `-DDXIL_SPIRV_NATIVE_LLVM=ON` CMake option is used. See `checkout_llvm.sh` script. ### Build Standard CMake build. ```shell mkdir build cd build cmake .. -DCMAKE_BUILD_TYPE=Release cmake --build . --config Release ``` ## Linking against dxil-spirv Only the C API is installed and is expected to be kept ABI/API stable when it releases. ### pkg-config ```shell pkg-config dxil-spirv-c-shared --cflags --libs ``` ### CMake module Something like: ``` find_package(dxil_spirv_c_shared) if (dxil_spirv_c_shared_FOUND) message("Found dxil-spirv! Enabling DXIL support.") target_link_libraries(vkd3d-shader PRIVATE dxil-spirv-c-shared) target_compile_definitions(vkd3d-shader PRIVATE HAVE_DXIL_SPV) target_sources(vkd3d-shader PRIVATE vkd3d/libs/vkd3d-shader/dxil.c) else() message("Did not find dxil-spirv :( Disabling DXIL support.") endif() ``` ## Testing The primary method of testing dxil-spirv and avoiding regressions is through a reference shader suite. ### Build DXC First, build DXC. To keep output consistent, we must use a fixed version of DXC. Currently, this only works on Linux, the Windows build of DXC does not seem to support CMake properly. ```shell ./checkout_dxc.sh ./build_dxc.sh ``` The test suite accepts an arbitrary path to DXC, so if you have a standalone binary somewhere, that can work as well. ### Run test suite When adding new tests, place the HLSL test in `shaders/` somewhere and run: ```shell ./test_shaders.py shaders --dxc external/dxc-build/bin/dxc --dxil-spirv cmake-build-debug/dxil-spirv ``` If there is any mismatch, the test script will complain. If there are legitimate changes to be made, add `--update` to the command. The updated files should now be committed alongside the dxil-spirv change. `--parallel` can (and should) be used to speed up the process. To update DXBC references, run: ```shell ./cmake-build-debug/dxbc-spirv-sandbox ./reference-dxbc ``` ### Running large repro suites For internal development, we also have an extensive repro suite which cover real-world content. These cannot be made public for obvious reasons, so the intent is that symlinks are set up during development. Shaders can be dumped with `VKD3D_SHADER_DUMP_PATH`. ```shell # The scripts might not work properly if the paths aren't laid out like this. ln -s ${DXIL_SPIRV_REPO}/shaders shaders-dxil ln -s ${DXBC_SPIRV_REPO}/shaders shaders-dxbc ln -s ${DXIL_SPIRV_REPO}/reference/shaders reference/shaders-dxil ln -s ${DXBC_SPIRV_REPO}/reference/shaders reference/shaders-dxbc ./test_shaders.py shaders-dxil --dxil-spirv cmake-build-release/dxil-spirv --parallel --update ./test_shaders.py shaders-dxbc --dxil-spirv cmake-build-release/dxil-spirv --parallel --update ``` To import shaders into the suite: ```shell mkdir shaders-dxil/dxilgame mkdir shaders-dxbc/dxbcgame # For DXIL ./copy_reference_shaders.py --dxil /tmp/path/to/vkd3d-shader-dump-path --raw --output shaders-dxil/dxilgame # For DXBC ./copy_reference_shaders.py --dxbc /tmp/path/to/vkd3d-shader-dump-path --raw --output shaders-dxbc/dxbcgame ``` To run an isolated subfolder only, there's `--subfolder`. ## License dxil-spirv is currently licensed as MIT. See LICENSE.MIT for more details. ```c /* Copyright (c) 2019-2025 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ ``` ================================================ FILE: bc/CMakeLists.txt ================================================ add_library(llvm-bc STATIC cast.hpp iterator.hpp data_structures.hpp value.hpp value.cpp instruction.hpp instruction.cpp function.hpp function.cpp context.hpp context.cpp type.hpp type.cpp module.hpp module.cpp module_dxbc_ir.cpp metadata.hpp metadata.cpp disassembler.cpp) target_compile_options(llvm-bc PRIVATE ${DXIL_SPV_CXX_FLAGS}) target_compile_definitions(llvm-bc PUBLIC HAVE_LLVMBC) target_link_libraries(llvm-bc PRIVATE bc-decoder dxil-debug dxil-utils) target_include_directories(llvm-bc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) set_target_properties(llvm-bc PROPERTIES POSITION_INDEPENDENT_CODE ON) target_link_libraries(llvm-bc PRIVATE dxbc-spirv) ================================================ FILE: bc/cast.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "logging.hpp" #include "metadata.hpp" #include "type.hpp" #include "value.hpp" #include namespace LLVMBC { struct ModuleParseContext; template inline T *cast(Type *type) { if (type->getTypeID() != T::get_type_id()) { LOGE("Invalid type ID in cast.\n"); std::terminate(); } return static_cast(type); } template inline const T *cast(const Type *type) { if (type->getTypeID() != T::get_type_id()) { LOGE("Invalid type ID in cast.\n"); std::terminate(); } return static_cast(type); } template inline T *dyn_cast(Type *type) { if (!type) return nullptr; if (type->getTypeID() != T::get_type_id()) return nullptr; else return static_cast(type); } template inline const T *dyn_cast(const Type *type) { if (!type) return nullptr; if (type->getTypeID() != T::get_type_id()) return nullptr; else return static_cast(type); } template inline bool isa(const Type *type) { return type->getTypeID() == T::get_type_id(); } class ValueProxy : public Value { public: static constexpr ValueKind get_value_kind() { return ValueKind::Proxy; } ValueProxy(Type *type, ModuleParseContext &context, uint64_t id); Value *get_proxy_value() const; bool resolve(); LLVMBC_DEFAULT_VALUE_KIND_IMPL private: uint64_t id; ModuleParseContext &context; Value *proxy = nullptr; }; namespace Internal { inline Value *resolve_proxy(Value *value); inline const Value *resolve_proxy(const Value *value); } // namespace Internal template inline T *cast(Value *value) { if (T::get_value_kind() != ValueKind::Proxy) value = Internal::resolve_proxy(value); if (T::is_base_of_value_kind(value->get_value_kind())) return static_cast(value); else { LOGE("Invalid type ID in cast.\n"); std::terminate(); } } template inline const T *cast(const Value *value) { if (T::get_value_kind() != ValueKind::Proxy) value = Internal::resolve_proxy(value); if (T::is_base_of_value_kind(value->get_value_kind())) return static_cast(value); else { LOGE("Invalid type ID in cast.\n"); std::terminate(); } } template inline T *dyn_cast(Value *value) { if (!value) return nullptr; if (T::get_value_kind() != ValueKind::Proxy) value = Internal::resolve_proxy(value); if (T::is_base_of_value_kind(value->get_value_kind())) return static_cast(value); else return nullptr; } template inline const T *dyn_cast(const Value *value) { if (!value) return nullptr; if (T::get_value_kind() != ValueKind::Proxy) value = Internal::resolve_proxy(value); if (T::is_base_of_value_kind(value->get_value_kind())) return static_cast(value); else return nullptr; } template inline bool isa(const Value *value) { if (T::get_value_kind() != ValueKind::Proxy) value = Internal::resolve_proxy(value); return T::is_base_of_value_kind(value->get_value_kind()); } namespace Internal { inline Value *resolve_proxy(Value *value) { while (value && value->get_value_kind() == ValueKind::Proxy) value = cast(value)->get_proxy_value(); return value; } inline const Value *resolve_proxy(const Value *value) { while (value && value->get_value_kind() == ValueKind::Proxy) value = cast(value)->get_proxy_value(); return value; } } // namespace Internal template inline T *cast(MDOperand &md) { if (md.get_metadata_kind() == T::get_metadata_kind()) return static_cast(&md); else { LOGE("Invalid type ID in cast.\n"); std::terminate(); } } template inline T *cast(MDOperand *md) { if (md->get_metadata_kind() == T::get_metadata_kind()) return static_cast(md); else { LOGE("Invalid type ID in cast.\n"); std::terminate(); } } template inline const T *cast(const MDOperand &md) { if (md.get_metadata_kind() == T::get_metadata_kind()) return static_cast(&md); else { LOGE("Invalid type ID in cast.\n"); std::terminate(); } } template inline const T *cast(const MDOperand *md) { if (md->get_metadata_kind() == T::get_metadata_kind()) return static_cast(md); else { LOGE("Invalid type ID in cast.\n"); std::terminate(); } } template inline T *dyn_cast(MDOperand &md) { if (md.get_metadata_kind() == T::get_metadata_kind()) return static_cast(&md); else return nullptr; } template inline T *dyn_cast(MDOperand *md) { if (!md) return nullptr; if (md->get_metadata_kind() == T::get_metadata_kind()) return static_cast(md); else return nullptr; } template inline const T *dyn_cast(const MDOperand &md) { if (md.get_metadata_kind() == T::get_metadata_kind()) return static_cast(&md); else return nullptr; } template inline const T *dyn_cast(const MDOperand *md) { if (!md) return nullptr; if (md->get_metadata_kind() == T::get_metadata_kind()) return static_cast(md); else return nullptr; } template inline bool isa(const MDOperand &md) { return md.get_metadata_kind() == T::get_metadata_kind(); } template inline bool isa(const MDOperand *md) { return md->get_metadata_kind() == T::get_metadata_kind(); } } // namespace LLVMBC ================================================ FILE: bc/context.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "context.hpp" #include namespace LLVMBC { LLVMContext::LLVMContext() { } LLVMContext::~LLVMContext() { for (size_t i = typed_allocations.size(); i; i--) typed_allocations[i - 1]->run(); for (size_t i = raw_allocations.size(); i; i--) dxil_spv::free_in_thread(raw_allocations[i - 1]); } void *LLVMContext::allocate_from_chain(uintptr_t size, uintptr_t align) { current_block = (current_block + align - 1) & ~(align - 1); if (current_block + size <= current_block_end) { void *ret = reinterpret_cast(current_block); current_block += size; return ret; } else { current_block = 0; current_block_end = 0; return nullptr; } } void LLVMContext::allocate_new_chain(size_t size, size_t align) { size_t min_size = size + align; if (min_size < 64 * 1024) min_size = 64 * 1024; void *ptr = dxil_spv::allocate_in_thread(min_size); if (ptr) { raw_allocations.push_back(ptr); current_block = reinterpret_cast(ptr); current_block_end = current_block + min_size; } else { current_block = 0; current_block_end = 0; } } void *LLVMContext::allocate(size_t size, size_t align) { void *ptr = allocate_from_chain(size, align); if (!ptr) { allocate_new_chain(size, align); ptr = allocate_from_chain(size, align); } return ptr; } } // namespace LLVMBC ================================================ FILE: bc/context.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "data_structures.hpp" #include #include #include namespace LLVMBC { class Type; class LLVMContext { public: LLVMContext(); ~LLVMContext(); void operator=(const LLVMContext &) = delete; LLVMContext(const LLVMContext &) = delete; template T *construct(U &&... u) { T *mem = static_cast(allocate(sizeof(T), alignof(T))); if (!mem) std::terminate(); T *t = new (mem) T(std::forward(u)...); if (!std::is_trivially_destructible::value) append_typed_destructor(t); return t; } template T *construct_n(size_t n, const U &... u) { T *mem = static_cast(allocate(sizeof(T) * n, alignof(T))); if (!mem) std::terminate(); for (size_t i = 0; i < n; i++) { T *tmp = new (&mem[i]) T(u...); if (!std::is_trivially_destructible::value) append_typed_destructor(tmp); } return mem; } Vector &get_type_cache() { return type_cache; } private: void *allocate(size_t size, size_t align); struct Deleter { virtual ~Deleter() = default; virtual void run() = 0; }; template struct TypedDeleter : Deleter { explicit TypedDeleter(T *ptr_) : ptr(ptr_) { } void run() override { ptr->~T(); } T *ptr; }; uintptr_t current_block = 0; uintptr_t current_block_end = 0; void *allocate_from_chain(uintptr_t size, uintptr_t align); void allocate_new_chain(size_t size, size_t align); Vector raw_allocations; Vector typed_allocations; Vector type_cache; template T *construct_trivial(U &&... u) { T *mem = static_cast(allocate(sizeof(T), alignof(T))); if (!mem) std::terminate(); T *t = new (mem) T(std::forward(u)...); return t; } template void append_typed_destructor(T *ptr) { typed_allocations.push_back(construct_trivial>(ptr)); } }; } // namespace LLVMBC ================================================ FILE: bc/data_structures.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "thread_local_allocator.hpp" namespace LLVMBC { template using Vector = dxil_spv::Vector; template using UnorderedSet = dxil_spv::UnorderedSet; template using UnorderedMap = dxil_spv::UnorderedMap; using String = dxil_spv::String; using StringStream = dxil_spv::StringStream; } ================================================ FILE: bc/disassembler.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cast.hpp" #include "context.hpp" #include "function.hpp" #include "instruction.hpp" #include "metadata.hpp" #include "module.hpp" #include "type.hpp" #include "value.hpp" #include #include #include namespace LLVMBC { struct StreamState { StringStream stream; unsigned indent = 0; void append(Type *type); void append(IntegerType *type); void append(PointerType *type); void append(ArrayType *type); void append(StructType *type); void append(FunctionType *type); void append(VectorType *type); void append(const String &str); void append(Value *value, bool decl = false); void append(GlobalVariable *value, bool decl = false); void append(Instruction *value); void append(Argument *value, bool decl = false); void append(ShuffleVectorInst *shuf, bool decl = false); void append(ExtractElementInst *extr, bool decl = false); void append(InsertElementInst *inst, bool decl = false); void append(Function *value, bool decl = false); void append(BinaryOperator *value, bool decl = false); void append(UnaryOperator *uop, bool decl = false); void append(CallInst *value, bool decl = false); void append(BranchInst *value, bool decl = false); void append(SwitchInst *branch, bool decl = false); void append(ReturnInst *value, bool decl = false); void append(UndefValue *value, bool decl = false); void append(Constant *value, bool decl = false); void append(ConstantInt *value, bool decl = false); void append(ConstantFP *value, bool decl = false); void append(BasicBlock *bb, bool decl = false); void append(FCmpInst *value, bool decl = false); void append(ICmpInst *value, bool decl = false); void append(PHINode *value, bool decl = false); void append(CastInst *value, bool decl = false); void append(SelectInst *value, bool decl = false); void append(ExtractValueInst *value, bool decl = false); void append(AllocaInst *value, bool decl = false); void append(GetElementPtrInst *value, bool decl = false); void append(LoadInst *value, bool decl = false); void append(StoreInst *value, bool decl = false); void append(AtomicRMWInst *value, bool decl = false); void append(AtomicCmpXchgInst *xchg, bool decl = false); void append(ConstantAggregate *agg, bool decl = false); void append(ConstantAggregateZero *zero, bool decl = false); void append(ConstantDataArray *data, bool decl = false); void append(ConstantDataVector *vec, bool decl = false); void append(ConstantExpr *expr, bool decl = false); void append(MDOperand *md); void append(NamedMDNode *md); void append(MDNode *md, bool decl = false); void append(float v); void append(double v); void append(bool v); void append(const char *str); void newline(); void newline_noindent(); void begin_scope(); void end_scope(); template void append(T &&t, Ts &&... ts) { append(std::forward(t)); append(std::forward(ts)...); } // Only want this overload to trigger on various integer types. template typename std::enable_if::value, void>::type append(T value) { stream << value; } // Need this to avoid the generic template to be deduced. template void append(char (&str)[N]) { return append(static_cast(str)); } }; void StreamState::append(IntegerType *type) { append("i", type->getBitWidth()); } void StreamState::append(StructType *type) { append("{ "); for (unsigned i = 0; i < type->getNumElements(); i++) { append(type->getElementType(i)); if (i + 1 < type->getNumElements()) append(", "); } append(" }"); } void StreamState::append(PointerType *type) { if (type->getAddressSpace() != 0) append(type->getElementType(), " addrspace(", type->getAddressSpace(), ")*"); else append(type->getElementType(), "*"); } void StreamState::append(ArrayType *type) { append("[", type->getArrayNumElements(), " x ", type->getArrayElementType(), "]"); } void StreamState::append(FunctionType *type) { append("(", type->getReturnType(), " (*) ("); for (unsigned i = 0; i < type->getNumParams(); i++) { append(type->getParamType(i)); if (i + 1 < type->getNumParams()) append(", "); } append("))"); } void StreamState::append(VectorType *type) { append(type->getElementType(), "x", type->getVectorSize()); } void StreamState::append(bool v) { stream << (v ? "true" : "false"); } void StreamState::append(float v) { char buf[1024]; sprintf(buf, "%e", v); append(buf); } void StreamState::append(double v) { char buf[1024]; sprintf(buf, "%e", v); append(buf); } void StreamState::newline() { stream << "\n"; for (unsigned i = 0; i < indent; i++) stream << " "; } void StreamState::newline_noindent() { stream << "\n"; } void StreamState::append(const char *str) { stream << str; } void StreamState::append(const String &str) { stream << str; } void StreamState::begin_scope() { append(" {"); indent++; } void StreamState::end_scope() { assert(indent > 0); indent--; newline(); append("}"); } void StreamState::append(Type *type) { switch (type->getTypeID()) { case Type::TypeID::IntegerTyID: return append(cast(type)); case Type::TypeID::PointerTyID: return append(cast(type)); case Type::TypeID::StructTyID: return append(cast(type)); case Type::TypeID::ArrayTyID: return append(cast(type)); case Type::TypeID::FunctionTyID: return append(cast(type)); case Type::TypeID::VectorTyID: return append(cast(type)); case Type::TypeID::HalfTyID: return append("half"); case Type::TypeID::FloatTyID: return append("float"); case Type::TypeID::DoubleTyID: return append("double"); case Type::TypeID::Unknown: return append("unknown"); case Type::TypeID::VoidTyID: return append("void"); case Type::TypeID::OpaqueTyID: return append("opaque"); default: break; } LOGE("Unknown Type %u.\n", unsigned(type->getTypeID())); } void StreamState::append(ShuffleVectorInst *shuf, bool decl) { if (decl) { append("%", shuf->get_tween_id(), " = shufflevector ", shuf->getType(), " ", shuf->getOperand(0), ", ", shuf->getOperand(1), " <"); auto *vec_type = cast(shuf->getType()); for (unsigned i = 0; i < vec_type->getVectorSize(); i++) { append(shuf->getMaskValue(i)); if (i + 1 < vec_type->getVectorSize()) append(", "); } append(">"); } else append("%", shuf->get_tween_id()); } void StreamState::append(ExtractElementInst *extr, bool decl) { if (decl) { append("%", extr->get_tween_id(), " = extractelement ", extr->getType(), " ", extr->getVectorOperand(), ", ", extr->getIndexOperand()); } else append("%", extr->get_tween_id()); } void StreamState::append(InsertElementInst *inst, bool decl) { if (decl) { append("%", inst->get_tween_id(), " = insertelement ", inst->getOperand(0), ", ", inst->getOperand(1), ", ", inst->getOperand(2)); } else append("%", inst->get_tween_id()); } void StreamState::append(Argument *arg, bool decl) { append("%arg", arg->getArgNo()); } void StreamState::append(Function *func, bool decl) { if (decl) { append("define ", func->getType(), " @", func->getName(), "("); auto *type = func->getFunctionType(); for (unsigned i = 0; i < type->getNumParams(); i++) { append(type->getParamType(i)); if (i + 1 < type->getNumParams()) append(", "); } append(")"); if (func->begin() != func->end()) { begin_scope(); for (auto &bb : *func) append(&bb, true); end_scope(); } } else append("@", func->getName()); } void StreamState::append(GlobalVariable *var, bool decl) { if (decl) { append("@", var->get_tween_id(), " = "); if (cast(var->getType())->getAddressSpace() != 0) append("groupshared "); else append(var->isConstant() ? "constant" : "global", " "); append(var->getType()->getPointerElementType()); if (var->hasInitializer()) append(" ", var->getInitializer()); newline(); } else { append("@", var->get_tween_id()); } } static const char *to_string(BinaryOperator::BinaryOps op) { switch (op) { #define BINOP(op, str) \ case BinaryOperator::BinaryOps::op: \ return str BINOP(InvalidBinaryOp, "invalid"); BINOP(Add, "add"); BINOP(FAdd, "fadd"); BINOP(Sub, "sub"); BINOP(FSub, "fsub"); BINOP(Mul, "mul"); BINOP(FMul, "fmul"); BINOP(UDiv, "udiv"); BINOP(SDiv, "sdiv"); BINOP(FDiv, "fdiv"); BINOP(URem, "urem"); BINOP(SRem, "srem"); BINOP(FRem, "frem"); BINOP(Shl, "shl"); BINOP(LShr, "lshr"); BINOP(AShr, "ashr"); BINOP(And, "and"); BINOP(Or, "or"); BINOP(Xor, "xor"); } #undef BINOP return "???"; } static const char *to_string(UnaryOperator::UnaryOps op) { switch (op) { case UnaryOperator::UnaryOps::FNeg: return "fneg"; default: return "invalid"; } } static const char *to_string(Instruction::Predicate pred) { switch (pred) { #define PRED(op, str) \ case Instruction::FCMP_##op: \ return str PRED(FALSE, "false"); PRED(OEQ, "oeq"); PRED(OGT, "ogt"); PRED(OGE, "oge"); PRED(OLT, "olt"); PRED(OLE, "ole"); PRED(ONE, "one"); PRED(ORD, "ord"); PRED(UNO, "uno"); PRED(UEQ, "ueq"); PRED(UGT, "ugt"); PRED(UGE, "uge"); PRED(ULT, "ult"); PRED(ULE, "ule"); PRED(UNE, "une"); PRED(TRUE, "true"); #undef PRED #define PRED(op, str) \ case Instruction::ICMP_##op: \ return str PRED(EQ, "eq"); PRED(NE, "ne"); PRED(UGT, "ugt"); PRED(UGE, "uge"); PRED(ULT, "ult"); PRED(ULE, "ule"); PRED(SGT, "sgt"); PRED(SGE, "sge"); PRED(SLT, "slt"); PRED(SLE, "sle"); } #undef PRED return "???"; } static const char *to_string(Instruction::CastOps op) { switch (op) { #define CAST(op, str) \ case Instruction::op: \ return str CAST(Trunc, "trunc"); CAST(ZExt, "zext"); CAST(SExt, "sext"); CAST(FPToUI, "fptoui"); CAST(FPToSI, "fptosi"); CAST(UIToFP, "uitofp"); CAST(SIToFP, "sitofp"); CAST(FPTrunc, "fptrunc"); CAST(FPExt, "fpext"); CAST(PtrToInt, "ptrtoint"); CAST(IntToPtr, "inttoptr"); CAST(BitCast, "bitcast"); CAST(AddrSpaceCast, "addrspacecast"); default: break; } #undef CAST return "???"; } static const char *to_string(AtomicRMWInst::BinOp op) { switch (op) { #define RMW(op, str) \ case AtomicRMWInst::BinOp::op: \ return str RMW(Add, "add"); RMW(Sub, "sub"); RMW(Xchg, "xchg"); RMW(And, "and"); RMW(Xor, "xor"); RMW(Or, "or"); RMW(Nand, "nand"); RMW(Max, "max"); RMW(Min, "min"); RMW(UMax, "umax"); RMW(UMin, "umin"); RMW(FAdd, "fadd"); RMW(FSub, "fsub"); default: break; } #undef RMW return "???"; } void StreamState::append(BinaryOperator *binop, bool decl) { if (decl) { append("%", binop->get_tween_id(), " = ", to_string(binop->getOpcode()), " ", binop->getType(), " ", binop->getOperand(0), ", ", binop->getOperand(1)); } else { append("%", binop->get_tween_id()); } } void StreamState::append(UnaryOperator *uop, bool decl) { if (decl) { append("%", uop->get_tween_id(), " = ", to_string(uop->getOpcode()), " ", uop->getType(), " ", uop->getOperand(0), ", ", uop->getOperand(1)); } else { append("%", uop->get_tween_id()); } } void StreamState::append(BasicBlock *bb, bool decl) { if (decl) { newline_noindent(); newline_noindent(); append(bb->get_tween_id(), ":"); for (auto &inst : *bb) { newline(); append(&inst); } } else { append("label %", bb->get_tween_id()); } } void StreamState::append(FCmpInst *value, bool decl) { if (decl) { append("%", value->get_tween_id(), " = fcmp ", to_string(value->getPredicate()), " ", value->getOperand(0), ", ", value->getOperand(1)); } else { append("%", value->get_tween_id()); } } void StreamState::append(ICmpInst *value, bool decl) { if (decl) { append("%", value->get_tween_id(), " = icmp ", to_string(value->getPredicate()), " ", value->getOperand(0), ", ", value->getOperand(1)); } else { append("%", value->get_tween_id()); } } void StreamState::append(BranchInst *br, bool) { append("br "); if (br->getCondition()) append(br->getCondition(), ", ", br->getSuccessor(0), ", ", br->getSuccessor(1)); else append(br->getSuccessor(0)); } void StreamState::append(SwitchInst *branch, bool) { append("switch ", branch->getCondition(), ", ", branch->getDefaultDest()); begin_scope(); for (auto itr = branch->case_begin(); itr != branch->case_end(); ++itr) { newline(); append(itr->getCaseValue(), ", ", itr->getCaseSuccessor()); } end_scope(); } void StreamState::append(CallInst *call, bool decl) { if (decl) { if (call->getType()->getTypeID() != Type::TypeID::VoidTyID) append("%", call->get_tween_id(), " = "); append("call ", call->getType(), " @", call->getCalledFunction()->getName(), "("); for (unsigned i = 0; i < call->getNumOperands(); i++) { append(call->getOperand(i)); if (i + 1 < call->getNumOperands()) append(", "); } append(")"); for (auto itr = call->metadata_begin(); itr != call->metadata_end(); ++itr) { append(" !", itr->first, " ", itr->second); } } else { append("%", call->get_tween_id()); } } void StreamState::append(CastInst *cast, bool decl) { if (decl) { append("%", cast->get_tween_id(), " = ", to_string(cast->getOpcode()), " ", cast->getOperand(0), " to ", cast->getType()); } else { append("%", cast->get_tween_id()); } } void StreamState::append(SelectInst *cast, bool decl) { if (decl) { append("%", cast->get_tween_id(), " = ", "select ", cast->getOperand(0), ", ", cast->getOperand(1), ", ", cast->getOperand(2)); } else { append("%", cast->get_tween_id()); } } void StreamState::append(ExtractValueInst *ext, bool decl) { if (decl) { append("%", ext->get_tween_id(), " = ", "extractvalue ", ext->getType(), " ", ext->getAggregateOperand()); for (unsigned i = 0; i < ext->getNumIndices(); i++) { append(", "); append(ext->getIndices()[i]); } } else { append("%", ext->get_tween_id()); } } void StreamState::append(AllocaInst *alloca, bool decl) { if (decl) { append("%", alloca->get_tween_id(), " = alloca ", cast(alloca->getType())->getElementType()); } else { append("%", alloca->get_tween_id()); } } void StreamState::append(GetElementPtrInst *ptr, bool decl) { if (decl) { append("%", ptr->get_tween_id(), " = getelementptr ", ptr->isInBounds() ? "inbounds " : "", ptr->getType()); for (unsigned i = 0; i < ptr->getNumOperands(); i++) { append(", "); append(ptr->getOperand(i)); } } else { append("%", ptr->get_tween_id()); } } void StreamState::append(LoadInst *ptr, bool decl) { if (decl) append("%", ptr->get_tween_id(), " = load ", ptr->getType(), " ", ptr->getPointerOperand()); else append("%", ptr->get_tween_id()); } void StreamState::append(StoreInst *ptr, bool decl) { if (decl) append("store ", ptr->getOperand(0), ", ", ptr->getOperand(1)); else append("%", ptr->get_tween_id()); } void StreamState::append(AtomicRMWInst *atomic_op, bool decl) { if (decl) { append("%", atomic_op->get_tween_id(), " = atomicrmw ", to_string(atomic_op->getOperation()), " ", atomic_op->getType(), " ", atomic_op->getPointerOperand(), ", ", atomic_op->getValOperand()); } else append("%", atomic_op->get_tween_id()); } void StreamState::append(AtomicCmpXchgInst *xchg, bool decl) { if (decl) { append("%", xchg->get_tween_id(), " = cmpxchg ", xchg->getType(), " ", xchg->getPointerOperand(), ", ", xchg->getCompareOperand(), ", ", xchg->getNewValOperand()); } else append("%", xchg->get_tween_id()); } void StreamState::append(ConstantAggregate *agg, bool) { append("["); if (agg->getNumOperands()) append(agg->getOperand(0)); for (unsigned i = 1; i < agg->getNumOperands(); i++) append(", ", agg->getOperand(i)); append("]"); } void StreamState::append(ConstantAggregateZero *zero, bool) { append("[zeroinitialized]"); } void StreamState::append(ConstantDataArray *arr, bool) { append("["); for (unsigned i = 0; i < arr->getNumElements(); i++) { append(arr->getElementAsConstant(i)); if (i + 1 < arr->getNumElements()) append(", "); } append("]"); } void StreamState::append(ConstantDataVector *vec, bool) { append("<"); for (unsigned i = 0; i < vec->getNumElements(); i++) { append(vec->getElementAsConstant(i)); if (i + 1 < vec->getNumElements()) append(", "); } append(">"); } void StreamState::append(ConstantExpr *expr, bool decl) { if (decl) { append("%", expr->get_tween_id(), " = ", expr->getOpcode(), " ", expr->getType()); if (expr->getNumOperands()) append(" ", expr->getOperand(0)); for (unsigned i = 1; i < expr->getNumOperands(); i++) append(", ", expr->getOperand(i)); } else { append("%", expr->get_tween_id()); } } void StreamState::append(PHINode *phi, bool decl) { if (decl) { append("%", phi->get_tween_id(), " = phi ", phi->getType(), " "); unsigned count = phi->getNumIncomingValues(); for (unsigned i = 0; i < count; i++) { Value *value = phi->getIncomingValue(i); BasicBlock *bb = phi->getIncomingBlock(i); append("[ ", value, ", ", bb, " ]"); if (i + 1 < count) append(", "); } } else { append("%", phi->get_tween_id()); } } void StreamState::append(ReturnInst *value, bool) { if (value->getReturnValue()) append("ret ", value); else append("ret void"); } void StreamState::append(UndefValue *undef, bool decl) { append(undef->getType(), " undef"); } void StreamState::append(ConstantFP *value, bool decl) { append(value->getValueAPF().convertToDouble()); } void StreamState::append(ConstantInt *value, bool decl) { append(value->getType(), " ", value->getUniqueInteger().getSExtValue()); } void StreamState::append(Constant *value, bool decl) { append(static_cast(value), decl); } void StreamState::append(Instruction *inst) { append(static_cast(inst), true); } void StreamState::append(MDNode *md, bool decl) { if (md) { if (decl) { append("!", md->get_tween_id(), " = !{"); for (unsigned i = 0; i < md->getNumOperands(); i++) { append(&md->getOperand(i)); if (i + 1 < md->getNumOperands()) append(", "); } append("}"); } else append("!", md->get_tween_id()); } else append("null"); } void StreamState::append(NamedMDNode *md) { append("!", md->getName(), " = !{"); for (unsigned i = 0; i < md->getNumOperands(); i++) { append(md->getOperand(i), false); if (i + 1 < md->getNumOperands()) append(", "); } append("}"); } void StreamState::append(MDOperand *md) { if (md) { switch (md->get_metadata_kind()) { case MetadataKind::NamedNode: return append(cast(md)); case MetadataKind::Node: return append(cast(md), false); case MetadataKind::Constant: return append(cast(md)->getValue()); case MetadataKind::String: return append("\"", cast(md)->getString(), "\""); case MetadataKind::None: return append("null"); default: LOGE("Unknown MetadataKind %u.\n", unsigned(md->get_metadata_kind())); break; } } else append("null"); } void StreamState::append(Value *value, bool decl) { switch (value->get_value_kind()) { case ValueKind::Argument: return append(cast(value), decl); case ValueKind::Function: return append(cast(value), decl); case ValueKind::BinaryOperator: return append(cast(value), decl); case ValueKind::UnaryOperator: return append(cast(value), decl); case ValueKind::Call: return append(cast(value), decl); case ValueKind::Branch: return append(cast(value), decl); case ValueKind::FCmp: return append(cast(value), decl); case ValueKind::ICmp: return append(cast(value), decl); case ValueKind::Return: return append(cast(value), decl); case ValueKind::Undef: return append(cast(value), decl); case ValueKind::ConstantInt: return append(cast(value), decl); case ValueKind::ConstantFP: return append(cast(value), decl); case ValueKind::BasicBlock: return append(cast(value), decl); case ValueKind::PHI: return append(cast(value), decl); case ValueKind::Cast: return append(cast(value), decl); case ValueKind::Select: return append(cast(value), decl); case ValueKind::ExtractValue: return append(cast(value), decl); case ValueKind::Alloca: return append(cast(value), decl); case ValueKind::GetElementPtr: return append(cast(value), decl); case ValueKind::Load: return append(cast(value), decl); case ValueKind::Store: return append(cast(value), decl); case ValueKind::AtomicRMW: return append(cast(value), decl); case ValueKind::AtomicCmpXchg: return append(cast(value), decl); case ValueKind::Global: return append(cast(value), decl); case ValueKind::ConstantAggregate: return append(cast(value), decl); case ValueKind::ConstantAggregateZero: return append(cast(value), decl); case ValueKind::ConstantDataArray: return append(cast(value), decl); case ValueKind::ConstantDataVector: return append(cast(value), decl); case ValueKind::ConstantExpr: return append(cast(value), decl); case ValueKind::Switch: return append(cast(value), decl); case ValueKind::ShuffleVector: return append(cast(value), decl); case ValueKind::ExtractElement: return append(cast(value), decl); case ValueKind::InsertElement: return append(cast(value), decl); default: break; } LOGE("Unknown ValueKind %u.\n", unsigned(value->get_value_kind())); if (decl) append("%", value->get_tween_id(), " = unimplemented"); else append("%", value->get_tween_id()); } bool disassemble(Module &module, String &str) { StreamState state; for (auto itr = module.global_begin(); itr != module.global_end(); ++itr) state.append(&*itr, true); for (auto *func : module) { state.newline(); state.append(func, true); state.newline(); } state.newline(); for (auto itr = module.named_metadata_begin(); itr != module.named_metadata_end(); ++itr) { state.newline(); state.append(itr->second); } state.newline(); for (auto itr = module.unnamed_metadata_begin(); itr != module.unnamed_metadata_end(); ++itr) { state.newline(); state.append(*itr, true); } str = state.stream.str(); return true; } } // namespace LLVMBC ================================================ FILE: bc/function.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "function.hpp" #include "context.hpp" #include "instruction.hpp" #include "module.hpp" #include "type.hpp" #include #include namespace LLVMBC { Function::Function(FunctionType *function_type_, uint64_t value_id_, Module &module_) : Constant(function_type_, ValueKind::Function) , module(module_) , value_id(value_id_) , function_type(function_type_) { } const String &Function::getName() const { return module.get_value_name(value_id); } void Function::set_basic_blocks(Vector basic_blocks_) { basic_blocks = std::move(basic_blocks_); } FunctionType *Function::getFunctionType() const { return function_type; } IteratorAdaptor::const_iterator> Function::begin() const { return basic_blocks.begin(); } IteratorAdaptor::const_iterator> Function::end() const { return basic_blocks.end(); } BasicBlock &Function::getEntryBlock() const { return *basic_blocks.front(); } void Function::add_argument(Argument *arg) { arguments.push_back(arg); } String Attribute::getValueAsString() const { // LLVM implementation does this. if (value) return *value; else return {}; } Attribute::Attribute(const String *value_) : value(value_) { } Attribute Function::getFnAttribute(const char *attribute) const { for (auto &attr : attributes) if (attr.first == attribute) return Attribute(&attr.second); return Attribute(nullptr); } bool Function::hasFnAttribute(const char *attribute) const { for (auto &attr : attributes) if (attr.first == attribute) return true; return false; } void Function::set_attributes(Vector> attributes_) { attributes = std::move(attributes_); } void Function::set_structured_control_flow() { structured_control_flow = true; } bool Function::get_structured_control_flow() const { return structured_control_flow; } IteratorAdaptor::const_iterator> Function::arg_begin() const { return arguments.begin(); } IteratorAdaptor::const_iterator> Function::arg_end() const { return arguments.end(); } BasicBlock::BasicBlock(LLVMContext &context_) : Value(Type::getLabelTy(context_), ValueKind::BasicBlock) { } void BasicBlock::add_instruction(Instruction *inst) { instructions.push_back(inst); } Instruction *BasicBlock::getTerminator() const { if (!instructions.empty() && instructions.back()->isTerminator()) return instructions.back(); else return nullptr; } void BasicBlock::add_successor(BasicBlock *succ) { if (std::find(succs.begin(), succs.end(), succ) == succs.end()) succs.push_back(succ); } BasicBlock::Merge BasicBlock::get_merge() const { return merge; } BasicBlock *BasicBlock::get_merge_bb() const { return merge_bb; } BasicBlock *BasicBlock::get_continue_bb() const { return continue_bb; } void BasicBlock::set_selection_merge(BasicBlock *bb) { merge = Merge::Selection; merge_bb = bb; } void BasicBlock::set_loop_merge(BasicBlock *merge_bb_, BasicBlock *continue_bb_) { merge = Merge::Loop; merge_bb = merge_bb_; continue_bb = continue_bb_; } IteratorAdaptor::const_iterator> BasicBlock::begin() const { return instructions.begin(); } IteratorAdaptor::const_iterator> BasicBlock::end() const { return instructions.end(); } Vector::const_iterator BasicBlock::succ_begin() const { return succs.begin(); } Vector::const_iterator BasicBlock::succ_end() const { return succs.end(); } } // namespace LLVMBC ================================================ FILE: bc/function.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "iterator.hpp" #include "value.hpp" namespace LLVMBC { class LLVMContext; class Instruction; class Module; class FunctionType; class BasicBlock : public Value { public: static constexpr ValueKind get_value_kind() { return ValueKind::BasicBlock; } explicit BasicBlock(LLVMContext &context); void add_instruction(Instruction *inst); Instruction *getTerminator() const; IteratorAdaptor::const_iterator> begin() const; IteratorAdaptor::const_iterator> end() const; void add_successor(BasicBlock *succ); enum class Merge { None, Selection, Loop }; Merge get_merge() const; void set_selection_merge(BasicBlock *bb); void set_loop_merge(BasicBlock *merge_bb, BasicBlock *continue_bb); BasicBlock *get_merge_bb() const; BasicBlock *get_continue_bb() const; Vector::const_iterator succ_begin() const; Vector::const_iterator succ_end() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Vector instructions; Vector succs; Merge merge = Merge::None; BasicBlock *merge_bb = nullptr; BasicBlock *continue_bb = nullptr; }; inline Vector::const_iterator succ_begin(const BasicBlock *bb) { return bb->succ_begin(); } inline Vector::const_iterator succ_end(const BasicBlock *bb) { return bb->succ_end(); } class Attribute { public: explicit Attribute(const String *value); String getValueAsString() const; private: const String *value; }; class Function : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::Function; } explicit Function(FunctionType *function_type, uint64_t value_id, Module &module); const String &getName() const; void set_basic_blocks(Vector basic_blocks); IteratorAdaptor::const_iterator> begin() const; IteratorAdaptor::const_iterator> end() const; FunctionType *getFunctionType() const; BasicBlock &getEntryBlock() const; void add_argument(Argument *arg); IteratorAdaptor::const_iterator> arg_begin() const; IteratorAdaptor::const_iterator> arg_end() const; // Bare bones implementation, we only need it for fp32-denorm-mode attribute. Attribute getFnAttribute(const char *attribute) const; bool hasFnAttribute(const char *attribute) const; void set_attributes(Vector> attributes); bool get_structured_control_flow() const; void set_structured_control_flow(); LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Module &module; uint64_t value_id; FunctionType *function_type; Vector basic_blocks; Vector arguments; Vector> attributes; bool structured_control_flow = false; }; } // namespace LLVMBC ================================================ FILE: bc/instruction.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "instruction.hpp" #include "cast.hpp" #include namespace LLVMBC { Instruction::Instruction(Type *type, ValueKind kind) : Value(type, kind) { } void Instruction::set_operands(Vector op) { operands = std::move(op); } unsigned Instruction::getNumOperands() const { return operands.size(); } Value *Instruction::getOperand(unsigned index) const { if (index >= operands.size()) { LOGE("Operand index is out of range.\n"); return nullptr; } return Internal::resolve_proxy(operands[index]); } bool Instruction::isTerminator() const { return is_terminator; } void Instruction::set_terminator() { is_terminator = true; } bool Instruction::resolve_proxy_values() { for (auto &op : operands) while (op && op->get_value_kind() == ValueKind::Proxy) op = cast(op)->get_proxy_value(); if (get_value_kind() == ValueKind::PHI) { auto *phi = cast(this); if (!phi->resolve_proxy_values_incoming()) return false; } return true; } void Instruction::setMetadata(const String &str, MDNode *node) { attachments[str] = node; } UnorderedMap::const_iterator Instruction::metadata_begin() const { return attachments.begin(); } UnorderedMap::const_iterator Instruction::metadata_end() const { return attachments.end(); } bool Instruction::hasMetadata(const String &str) const { return attachments.find(str) != attachments.end(); } MDNode *Instruction::getMetadata(const String &str) const { auto itr = attachments.find(str); if (itr != attachments.end()) return itr->second; else return nullptr; } bool Instruction::is_base_of_value_kind(ValueKind kind) { switch (kind) { case ValueKind::Return: case ValueKind::Unreachable: case ValueKind::Call: case ValueKind::UnaryOperator: case ValueKind::BinaryOperator: case ValueKind::Cast: case ValueKind::Select: case ValueKind::ExtractValue: case ValueKind::Alloca: case ValueKind::GetElementPtr: case ValueKind::Load: case ValueKind::Store: case ValueKind::CompareBase: case ValueKind::FCmp: case ValueKind::ICmp: case ValueKind::Branch: case ValueKind::Switch: case ValueKind::PHI: case ValueKind::AtomicRMW: case ValueKind::AtomicCmpXchg: case ValueKind::ShuffleVector: case ValueKind::ExtractElement: case ValueKind::InsertElement: return true; default: break; } return false; } BinaryOperator::BinaryOperator(Value *LHS, Value *RHS, BinaryOps op_) : Instruction(LHS->getType(), ValueKind::BinaryOperator) , op(op_) { set_operands({ LHS, RHS }); } BinaryOperator::BinaryOps BinaryOperator::getOpcode() const { return op; } bool BinaryOperator::isFast() const { return fast_math; } void BinaryOperator::setFast(bool enabled) { fast_math = enabled; } UnaryOperator::UnaryOperator(UnaryOps uop, Value *value) : Instruction(value->getType(), ValueKind::UnaryOperator), op(uop) { set_operands({ value }); } UnaryOperator::UnaryOps UnaryOperator::getOpcode() const { return op; } ReturnInst::ReturnInst(Value *value_) : Instruction(value_ ? value_->getType() : nullptr, ValueKind::Return) , value(value_) { set_terminator(); } UnreachableInst::UnreachableInst() : Instruction(nullptr, ValueKind::Unreachable) { set_terminator(); } CallInst::CallInst(FunctionType *function_type_, Function *callee_, Vector params) : Instruction(function_type_->getReturnType(), ValueKind::Call) , callee(callee_) { set_operands(std::move(params)); } Function *CallInst::getCalledFunction() const { return callee; } Value *ReturnInst::getReturnValue() const { return Internal::resolve_proxy(value); } CmpInst::CmpInst(ValueKind kind, Predicate pred_, Value *LHS, Value *RHS) : Instruction(Type::getInt1Ty(LHS->getType()->getContext()), kind) , pred(pred_) { set_operands({ LHS, RHS }); } CastInst::CastInst(Type *type, Value *value, Instruction::CastOps op_) : Instruction(type, ValueKind::Cast) , op(op_) { set_operands({ value }); } SelectInst::SelectInst(Value *true_value, Value *false_value, Value *cond) : Instruction(true_value->getType(), ValueKind::Select) { set_operands({ cond, true_value, false_value }); } ExtractValueInst::ExtractValueInst(Type *type, Value *aggregate, Vector indices_) : Instruction(type, ValueKind::ExtractValue) , indices(std::move(indices_)) { set_operands({ aggregate }); } Value *ExtractValueInst::getAggregateOperand() const { return Internal::resolve_proxy(operands[0]); } unsigned ExtractValueInst::getNumIndices() const { return indices.size(); } const unsigned *ExtractValueInst::getIndices() const { return indices.data(); } Instruction::CastOps CastInst::getOpcode() const { return op; } Instruction::Predicate CmpInst::getPredicate() const { return pred; } bool CmpInst::is_base_of_value_kind(ValueKind kind) { return kind == ValueKind::ICmp || kind == ValueKind::FCmp; } FCmpInst::FCmpInst(Predicate pred_, Value *LHS, Value *RHS) : CmpInst(ValueKind::FCmp, pred_, LHS, RHS) { set_operands({ LHS, RHS }); } ICmpInst::ICmpInst(Predicate pred_, Value *LHS, Value *RHS) : CmpInst(ValueKind::ICmp, pred_, LHS, RHS) { set_operands({ LHS, RHS }); } BranchInst::BranchInst(BasicBlock *true_block, BasicBlock *false_block, Value *cond_) : Instruction(nullptr, ValueKind::Branch) , cond(cond_) { set_terminator(); num_blocks = 2; bbs[0] = true_block; bbs[1] = false_block; } BranchInst::BranchInst(BasicBlock *true_block) : Instruction(nullptr, ValueKind::Branch) { set_terminator(); num_blocks = 1; bbs[0] = true_block; } bool BranchInst::isConditional() const { return cond != nullptr; } Value *BranchInst::getCondition() const { return Internal::resolve_proxy(cond); } BasicBlock *BranchInst::getSuccessor(unsigned index) const { assert(index < num_blocks); return bbs[index]; } unsigned BranchInst::getNumSuccessors() const { return num_blocks; } SwitchInst::SwitchInst(Value *cond_, BasicBlock *default_block_, unsigned num_cases) : Instruction(Type::getVoidTy(cond_->getType()->getContext()), ValueKind::Switch) , cond(cond_) , default_block(default_block_) { set_terminator(); cases.reserve(num_cases); } void SwitchInst::addCase(Value *case_value, BasicBlock *bb) { cases.push_back({ case_value, bb }); } Vector::const_iterator SwitchInst::case_begin() const { return cases.begin(); } Vector::const_iterator SwitchInst::case_end() const { return cases.end(); } BasicBlock *SwitchInst::getDefaultDest() const { return default_block; } Value *SwitchInst::getCondition() const { return Internal::resolve_proxy(cond); } ConstantInt *SwitchInst::Case::getCaseValue() const { return cast(value); } BasicBlock *SwitchInst::Case::getCaseSuccessor() const { return bb; } PHINode::PHINode(Type *type, size_t num_edges) : Instruction(type, ValueKind::PHI) { incoming.reserve(num_edges); } void PHINode::add_incoming(Value *value, BasicBlock *bb) { incoming.push_back({ value, bb }); } unsigned PHINode::getNumIncomingValues() const { return unsigned(incoming.size()); } AllocaInst::AllocaInst(Type *pointer_type, Type *element_type_, Value *size) : Instruction(pointer_type, ValueKind::Alloca) , array_size(size) { } Value *AllocaInst::getArraySize() const { return Internal::resolve_proxy(array_size); } GetElementPtrInst::GetElementPtrInst(Type *pointer_type, Vector indices, bool inbounds_) : Instruction(pointer_type, ValueKind::GetElementPtr) , inbounds(inbounds_) { set_operands(std::move(indices)); } bool GetElementPtrInst::isInBounds() const { return inbounds; } LoadInst::LoadInst(Type *type, Value *ptr) : Instruction(type, ValueKind::Load) { set_operands({ ptr }); } Value *LoadInst::getPointerOperand() const { return getOperand(0); } StoreInst::StoreInst(Value *ptr, Value *value) : Instruction(Type::getVoidTy(ptr->getType()->getContext()), ValueKind::Store) { set_operands({ value, ptr }); } BasicBlock *PHINode::getIncomingBlock(unsigned index) const { if (index >= incoming.size()) return nullptr; return incoming[index].bb; } Value *PHINode::getIncomingValue(unsigned index) const { if (index >= incoming.size()) return nullptr; return Internal::resolve_proxy(incoming[index].value); } bool PHINode::resolve_proxy_values_incoming() { for (auto &node : incoming) { while (node.value && node.value->get_value_kind() == ValueKind::Proxy) { node.value = cast(node.value)->get_proxy_value(); if (!node.value) return false; } } return true; } AtomicRMWInst::AtomicRMWInst(Type *type, Value *ptr_, Value *value_, BinOp op_) : Instruction(type, ValueKind::AtomicRMW) , ptr(ptr_) , value(value_) , op(op_) { set_operands({ ptr, value }); } Value *AtomicRMWInst::getPointerOperand() const { return Internal::resolve_proxy(ptr); } Value *AtomicRMWInst::getValOperand() const { return Internal::resolve_proxy(value); } AtomicRMWInst::BinOp AtomicRMWInst::getOperation() const { return op; } AtomicCmpXchgInst::AtomicCmpXchgInst(Value *ptr_, Value *cmp_, Value *new_value_, Type *type_override) : Instruction(type_override ? type_override : StructType::get(new_value_->getType()->getContext(), { new_value_->getType(), Type::getInt1Ty(new_value_->getType()->getContext()) }), ValueKind::AtomicCmpXchg) , ptr(ptr_) , new_value(new_value_) , cmp_value(cmp_) { set_operands({ ptr, new_value, cmp_value }); } Value *AtomicCmpXchgInst::getPointerOperand() const { return Internal::resolve_proxy(ptr); } Value *AtomicCmpXchgInst::getCompareOperand() const { return Internal::resolve_proxy(cmp_value); } Value *AtomicCmpXchgInst::getNewValOperand() const { return Internal::resolve_proxy(new_value); } ShuffleVectorInst::ShuffleVectorInst(Type *type, Value *a, Value *b, Value *shuf) : Instruction(type, ValueKind::ShuffleVector) { set_operands({ a, b }); auto *masks = cast(shuf); shuffle_mask.reserve(masks->getNumElements()); for (unsigned i = 0; i < masks->getNumElements(); i++) shuffle_mask.push_back(cast(masks->getElementAsConstant(i))->getUniqueInteger().getSExtValue()); } int ShuffleVectorInst::getMaskValue(unsigned index) const { assert(index < shuffle_mask.size()); return shuffle_mask[index]; } ExtractElementInst::ExtractElementInst(Value *vec_, Value *index_) : Instruction(cast(vec_->getType())->getElementType(), ValueKind::ExtractElement), vec(vec_), index(index_) { set_operands({ vec, index }); } Value *ExtractElementInst::getVectorOperand() const { return Internal::resolve_proxy(vec); } Value *ExtractElementInst::getIndexOperand() const { return Internal::resolve_proxy(index); } InsertElementInst::InsertElementInst(Value *vec, Value *value, Value *index) : Instruction(vec->getType(), ValueKind::InsertElement) { set_operands({ vec, value, index }); } CompositeConstructInst::CompositeConstructInst(Type *type, Vector constituents) : Instruction(type, ValueKind::CompositeConstruct) { set_operands(std::move(constituents)); } } // namespace LLVMBC ================================================ FILE: bc/instruction.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "value.hpp" namespace LLVMBC { class FunctionType; class Function; class BasicBlock; class MDNode; class Instruction : public Value { public: Instruction(Type *type, ValueKind kind); enum Predicate { FCMP_FALSE = 0, FCMP_OEQ = 1, FCMP_OGT = 2, FCMP_OGE = 3, FCMP_OLT = 4, FCMP_OLE = 5, FCMP_ONE = 6, FCMP_ORD = 7, FCMP_UNO = 8, FCMP_UEQ = 9, FCMP_UGT = 10, FCMP_UGE = 11, FCMP_ULT = 12, FCMP_ULE = 13, FCMP_UNE = 14, FCMP_TRUE = 15, ICMP_EQ = 32, ICMP_NE = 33, ICMP_UGT = 34, ICMP_UGE = 35, ICMP_ULT = 36, ICMP_ULE = 37, ICMP_SGT = 38, ICMP_SGE = 39, ICMP_SLT = 40, ICMP_SLE = 41 }; enum CastOps { InvalidCastOp = 100, Trunc, ZExt, SExt, FPToUI, FPToSI, UIToFP, SIToFP, FPTrunc, FPExt, PtrToInt, IntToPtr, BitCast, AddrSpaceCast }; enum GEPOps { GetElementPtr = 200 }; enum BinaryOps { InvalidBinaryOp = 300, Add, FAdd, Sub, FSub, Mul, FMul, UDiv, SDiv, FDiv, URem, SRem, FRem, Shl, LShr, AShr, And, Or, Xor }; bool isTerminator() const; Value *getOperand(unsigned index) const; unsigned getNumOperands() const; bool resolve_proxy_values(); MDNode *getMetadata(const String &str) const; bool hasMetadata(const String &str) const; void setMetadata(const String &str, MDNode *node); UnorderedMap::const_iterator metadata_begin() const; UnorderedMap::const_iterator metadata_end() const; static bool is_base_of_value_kind(ValueKind kind); static constexpr ValueKind get_value_kind() { return ValueKind::InstructionBase; } protected: void set_terminator(); bool is_terminator = false; void set_operands(Vector op); Vector operands; UnorderedMap attachments; }; class ReturnInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Return; } explicit ReturnInst(Value *value); Value *getReturnValue() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Value *value; }; class UnreachableInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Unreachable; } UnreachableInst(); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class CallInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Call; } CallInst(FunctionType *function_type, Function *callee, Vector params); Function *getCalledFunction() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Function *callee; }; class UnaryOperator : public Instruction { public: enum class UnaryOps { Invalid, FNeg, INeg, // custom extension }; static constexpr ValueKind get_value_kind() { return ValueKind::UnaryOperator; } UnaryOperator(UnaryOps uop, Value *value); UnaryOps getOpcode() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: UnaryOps op; }; class BinaryOperator : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::BinaryOperator; } BinaryOperator(Value *LHS, Value *RHS, BinaryOps op); BinaryOps getOpcode() const; void setFast(bool enabled); bool isFast() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: BinaryOps op; bool fast_math = false; }; class CastInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Cast; } CastInst(Type *type, Value *value, Instruction::CastOps op); Instruction::CastOps getOpcode() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Instruction::CastOps op; }; class SelectInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Select; } SelectInst(Value *true_value, Value *false_value, Value *cond); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class ExtractValueInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::ExtractValue; } ExtractValueInst(Type *type, Value *aggregate, Vector indices); Value *getAggregateOperand() const; unsigned getNumIndices() const; const unsigned *getIndices() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Vector indices; }; class AllocaInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Alloca; } AllocaInst(Type *pointer_type, Type *element_type, Value *size); Value *getArraySize() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Value *array_size; }; class GetElementPtrInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::GetElementPtr; } GetElementPtrInst(Type *pointer_type, Vector arguments, bool inbounds); bool isInBounds() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: bool inbounds; }; class LoadInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Load; } LoadInst(Type *type, Value *ptr); Value *getPointerOperand() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class StoreInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Store; } StoreInst(Value *ptr, Value *value); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class CmpInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::CompareBase; } CmpInst(ValueKind kind, Predicate pred, Value *LHS, Value *RHS); Predicate getPredicate() const; static bool is_base_of_value_kind(ValueKind kind); private: Predicate pred; }; class FCmpInst : public CmpInst { public: static constexpr ValueKind get_value_kind() { return ValueKind::FCmp; } FCmpInst(Predicate pred, Value *LHS, Value *RHS); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class ICmpInst : public CmpInst { public: static constexpr ValueKind get_value_kind() { return ValueKind::ICmp; } ICmpInst(Predicate pred, Value *LHS, Value *RHS); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class BranchInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Branch; } BranchInst(BasicBlock *true_block, BasicBlock *false_block, Value *cond); explicit BranchInst(BasicBlock *true_block); bool isConditional() const; Value *getCondition() const; unsigned getNumSuccessors() const; BasicBlock *getSuccessor(unsigned index) const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: BasicBlock *bbs[2] = {}; unsigned num_blocks = 0; Value *cond = nullptr; }; class SwitchInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::Switch; } SwitchInst(Value *cond, BasicBlock *default_block, unsigned num_cases); void addCase(Value *case_value, BasicBlock *bb); struct Case { Value *value; BasicBlock *bb; BasicBlock *getCaseSuccessor() const; ConstantInt *getCaseValue() const; }; Vector::const_iterator case_begin() const; Vector::const_iterator case_end() const; Value *getCondition() const; BasicBlock *getDefaultDest() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Value *cond; BasicBlock *default_block; Vector cases; }; class PHINode : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::PHI; } PHINode(Type *type, size_t num_edges); unsigned getNumIncomingValues() const; Value *getIncomingValue(unsigned index) const; BasicBlock *getIncomingBlock(unsigned index) const; void add_incoming(Value *value, BasicBlock *bb); bool resolve_proxy_values_incoming(); LLVMBC_DEFAULT_VALUE_KIND_IMPL private: struct Incoming { Value *value; BasicBlock *bb; }; Vector incoming; }; class AtomicRMWInst : public Instruction { public: enum class BinOp { Invalid, Xchg, Add, Sub, And, Nand, Or, Xor, Max, Min, UMax, UMin, FAdd, // wat FSub }; static constexpr ValueKind get_value_kind() { return ValueKind::AtomicRMW; } AtomicRMWInst(Type *type, Value *ptr, Value *value, BinOp op); Value *getPointerOperand() const; Value *getValOperand() const; BinOp getOperation() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Value *ptr; Value *value; BinOp op; }; class AtomicCmpXchgInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::AtomicCmpXchg; } AtomicCmpXchgInst(Value *ptr, Value *cmp, Value *new_value, Type *type_override = nullptr); Value *getPointerOperand() const; Value *getNewValOperand() const; Value *getCompareOperand() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Value *ptr; Value *new_value; Value *cmp_value; }; class ShuffleVectorInst : public Instruction { public: ShuffleVectorInst(Type *type, Value *a, Value *b, Value *shuf); static constexpr ValueKind get_value_kind() { return ValueKind::ShuffleVector; } int getMaskValue(unsigned index) const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Vector shuffle_mask; }; class ExtractElementInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::ExtractElement; } ExtractElementInst(Value *vec, Value *offset); Value *getVectorOperand() const; Value *getIndexOperand() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Value *vec; Value *index; }; class InsertElementInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::InsertElement; } InsertElementInst(Value *vec, Value *value, Value *index); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; // Extension of LLVM to better map to SPIR-V / DXBC-IR class CompositeConstructInst : public Instruction { public: static constexpr ValueKind get_value_kind() { return ValueKind::CompositeConstruct; } CompositeConstructInst(Type *type, Vector constituents); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; } // namespace LLVMBC ================================================ FILE: bc/iterator.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include namespace LLVMBC { // An iterator adaptor which lets us receive reference types instead of pointer types. template struct IteratorAdaptor { IteratorAdaptor(Iter iter_) : iter(iter_) { } T &operator*() { return **iter; } T *operator->() { return *iter; } IteratorAdaptor operator++() { ++iter; return *this; } bool operator==(const IteratorAdaptor &other) const { return iter == other.iter; } bool operator!=(const IteratorAdaptor &other) const { return !(*this == other); } ptrdiff_t operator-(const IteratorAdaptor &other) const { return iter - other.iter; } Iter iter; }; } // namespace LLVMBC ================================================ FILE: bc/metadata.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "metadata.hpp" #include "module.hpp" #include "value.hpp" #include #include namespace LLVMBC { MDOperand::MDOperand(Module *parent_, MetadataKind kind_) : parent(parent_) , kind(kind_) { } MDOperand::MDOperand(Module *parent_) : parent(parent_) { } Module *MDOperand::getParent() const { return parent; } MetadataKind MDOperand::get_metadata_kind() const { return kind; } MDNode::MDNode(Module *module, Vector operands_) : MDOperand(module, MetadataKind::Node) , operands(std::move(operands_)) { } unsigned MDNode::getNumOperands() const { return unsigned(operands.size()); } MDOperand &MDNode::getOperand(unsigned index) const { assert(index < operands.size()); return *operands[index]; } uint64_t MDNode::get_tween_id() const { return tween; } void MDNode::set_tween_id(uint64_t id) { tween = id; } NamedMDNode::NamedMDNode(Module *module, String name_, Vector operands_) : MDOperand(module, MetadataKind::NamedNode) , name(std::move(name_)) , operands(std::move(operands_)) { } unsigned NamedMDNode::getNumOperands() const { return unsigned(operands.size()); } MDNode *NamedMDNode::getOperand(unsigned index) const { assert(index < operands.size()); return operands[index]; } const String &NamedMDNode::getName() const { return name; } ConstantAsMetadata::ConstantAsMetadata(Module *module, Constant *value_) : MDOperand(module, MetadataKind::Constant) , value(value_) { } Constant *ConstantAsMetadata::getValue() const { return value; } MDString::MDString(LLVMBC::Module *module, String str_) : MDOperand(module, MetadataKind::String) , str(std::move(str_)) { } const String &MDString::getString() const { return str; } } // namespace LLVMBC ================================================ FILE: bc/metadata.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "data_structures.hpp" namespace LLVMBC { class Value; class Module; class Constant; enum class MetadataKind { NamedNode, Node, Constant, String, None }; class MDOperand { public: explicit MDOperand(Module *parent); MDOperand(Module *parent, MetadataKind kind); Module *getParent() const; MetadataKind get_metadata_kind() const; explicit operator bool() const { return kind != MetadataKind::None; } private: Module *parent; MetadataKind kind = MetadataKind::None; }; class MDNode : public MDOperand { public: static constexpr MetadataKind get_metadata_kind() { return MetadataKind::Node; } MDNode(Module *module, Vector operands); MDOperand &getOperand(unsigned index) const; unsigned getNumOperands() const; void set_tween_id(uint64_t id); uint64_t get_tween_id() const; private: Vector operands; uint64_t tween = 0; }; class NamedMDNode : public MDOperand { public: static constexpr MetadataKind get_metadata_kind() { return MetadataKind::NamedNode; } NamedMDNode(Module *module, String name, Vector operands); const String &getName() const; MDNode *getOperand(unsigned index) const; unsigned getNumOperands() const; private: String name; Vector operands; }; class ConstantAsMetadata : public MDOperand { public: static constexpr MetadataKind get_metadata_kind() { return MetadataKind::Constant; } ConstantAsMetadata(Module *module, Constant *value); Constant *getValue() const; private: Constant *value; }; class MDString : public MDOperand { public: static constexpr MetadataKind get_metadata_kind() { return MetadataKind::String; } MDString(Module *module, String str); const String &getString() const; private: String str; }; } // namespace LLVMBC ================================================ FILE: bc/module.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "module.hpp" #include "cast.hpp" #include "context.hpp" #include "function.hpp" #include "instruction.hpp" #include "logging.hpp" #include "metadata.hpp" #include "type.hpp" #include "value.hpp" #include #include "llvm_decoder.h" namespace LLVMBC { enum class KnownBlocks : uint32_t { BLOCKINFO = 0, // 1-7 reserved, MODULE_BLOCK = 8, PARAMATTR_BLOCK = 9, PARAMATTR_GROUP_BLOCK = 10, CONSTANTS_BLOCK = 11, FUNCTION_BLOCK = 12, TYPE_SYMTAB_BLOCK = 13, VALUE_SYMTAB_BLOCK = 14, METADATA_BLOCK = 15, METADATA_ATTACHMENT = 16, TYPE_BLOCK = 17, }; enum class AttributeRecord : uint32_t { NONE = 0, ALIGNMENT = 1, BY_VAL = 3, STACK_ALIGNMENT = 25, DEREFERENCEABLE = 41, DEREFERENCEABLE_OR_NULL = 42, ALLOC_SIZE = 51 }; enum class ModuleRecord : uint32_t { VERSION = 1, TRIPLE = 2, DATALAYOUT = 3, GLOBAL_VARIABLE = 7, FUNCTION = 8, }; enum class ConstantsRecord : uint32_t { SETTYPE = 1, CONST_NULL = 2, UNDEF = 3, INTEGER = 4, WIDE_INTEGER = 5, FLOAT = 6, AGGREGATE = 7, STRING = 8, BINOP = 10, CE_CAST = 11, GEP = 12, INBOUNDS_GEP = 20, DATA = 22, GEP_WITH_INRANGE_INDEX = 24 }; enum class FunctionRecord : uint32_t { DECLAREBLOCKS = 1, INST_BINOP = 2, INST_CAST = 3, INST_GEP_OLD = 4, INST_SELECT = 5, INST_EXTRACTELT = 6, INST_INSERTELT = 7, INST_SHUFFLEVEC = 8, INST_CMP = 9, INST_RET = 10, INST_BR = 11, INST_SWITCH = 12, INST_INVOKE = 13, INST_UNREACHABLE = 15, INST_PHI = 16, INST_ALLOCA = 19, INST_LOAD = 20, INST_VAARG = 23, INST_STORE_OLD = 24, INST_EXTRACTVAL = 26, INST_INSERTVAL = 27, INST_CMP2 = 28, INST_VSELECT = 29, INST_INBOUNDS_GEP_OLD = 30, INST_INDIRECTBR = 31, DEBUG_LOC_AGAIN = 33, INST_CALL = 34, DEBUG_LOC = 35, INST_FENCE = 36, INST_CMPXCHG_OLD = 37, INST_ATOMICRMW = 38, INST_RESUME = 39, INST_LANDINGPAD_OLD = 40, INST_LOADATOMIC = 41, INST_STOREATOMIC_OLD = 42, INST_GEP = 43, INST_STORE = 44, INST_STOREATOMIC = 45, INST_CMPXCHG = 46, INST_LANDINGPAD = 47, INST_CLEANUPRET = 48, INST_CATCHRET = 49, INST_CATCHPAD = 50, INST_CLEANUPPAD = 51, INST_CATCHSWITCH = 52, OPERAND_BUNDLE = 55, INST_UNOP = 56, INST_CALLBR = 57, }; enum class ValueSymtabRecord : uint32_t { ENTRY = 1, BBENTRY = 2, FNENTRY = 3, COMBINED_ENTRY = 5, }; enum class MetaDataRecord : uint32_t { STRING_OLD = 1, VALUE = 2, NODE = 3, NAME = 4, DISTINCT_NODE = 5, KIND = 6, LOCATION = 7, OLD_NODE = 8, OLD_FN_NODE = 9, NAMED_NODE = 10, ATTACHMENT = 11, GENERIC_DEBUG = 12, SUBRANGE = 13, ENUMERATOR = 14, BASIC_TYPE = 15, FILE = 16, DERIVED_TYPE = 17, COMPOSITE_TYPE = 18, SUBROUTINE_TYPE = 19, COMPILE_UNIT = 20, SUBPROGRAM = 21, LEXICAL_BLOCK = 22, LEXICAL_BLOCK_FILE = 23, NAMESPACE = 24, TEMPLATE_TYPE = 25, TEMPLATE_VALUE = 26, GLOBAL_VAR = 27, LOCAL_VAR = 28, EXPRESSION = 29, OBJC_PROPERTY = 30, IMPORTED_ENTITY = 31, MODULE = 32, MACRO = 33, MACRO_FILE = 34, STRINGS = 35, GLOBAL_DECL_ATTACHMENT = 36, GLOBAL_VAR_EXPR = 37, INDEX_OFFSET = 38, INDEX = 39, LABEL = 40, COMMON_BLOCK = 44, }; enum class AttributeCodes : uint32_t { CodeEntryOld = 1, CodeEntry = 2, GroupCodeEntry = 3 }; enum class TypeRecord : uint32_t { NUMENTRY = 1, VOID_TYPE = 2, FLOAT = 3, DOUBLE = 4, LABEL = 5, OPAQUE_TYPE = 6, INTEGER = 7, POINTER = 8, FUNCTION_OLD = 9, HALF = 10, ARRAY = 11, VECTOR = 12, METADATA = 16, STRUCT_ANON = 18, STRUCT_NAME = 19, STRUCT_NAMED = 20, FUNCTION = 21, TOKEN = 22, }; enum class UnaryOp : uint32_t { FNEG = 0 }; enum class BinOp : uint32_t { ADD = 0, SUB = 1, MUL = 2, UDIV = 3, SDIV = 4, UREM = 5, SREM = 6, SHL = 7, LSHR = 8, ASHR = 9, AND = 10, OR = 11, XOR = 12 }; enum class AtomicBinOp : uint32_t { RMW_XCHG = 0, RMW_ADD = 1, RMW_SUB = 2, RMW_AND = 3, RMW_NAND = 4, RMW_OR = 5, RMW_XOR = 6, RMW_MAX = 7, RMW_MIN = 8, RMW_UMAX = 9, RMW_UMIN = 10, RMW_FADD = 11, RMW_FSUB = 12 }; enum class CastOp : uint32_t { TRUNC = 0, ZEXT = 1, SEXT = 2, FPTOUI = 3, FPTOSI = 4, UITOFP = 5, SITOFP = 6, FPTRUNC = 7, FPEXT = 8, PTRTOINT = 9, INTTOPTR = 10, BITCAST = 11, ADDSPACECAST = 12 }; enum CallFlagBits { CALL_TAIL_BIT = 1 << 0, CALL_CCONV_BIT = 1 << 1, CALL_MUSTTAIL_BIT = 1 << 14, CALL_EXPLICIT_TYPE_BIT = 1 << 15, CALL_NOTAIL_BIT = 1 << 16, CALL_FMF_BIT = 1 << 17 }; enum FastMathFlagBits { FAST_MATH_UNSAFE_ALGEBRA_BIT = 1 << 0, FAST_MATH_ALLOW_CONTRACT_BIT = 1 << 5 }; static int64_t decode_sign_rotated_value(uint64_t v) { bool sign = (v & 1) != 0; v >>= 1; if (sign) v = v ? -int64_t(v) : (1ull << 63u); return int64_t(v); } struct ModuleParseContext { Function *function = nullptr; Module *module = nullptr; LLVMContext *context = nullptr; Vector basic_blocks; Vector values; Vector instructions; Vector types; Vector functions_with_bodies; UnorderedMap metadata; UnorderedMap metadata_kind_map; Vector>> attribute_lists; UnorderedMap>> attribute_groups; Type *constant_type = nullptr; String current_metadata_name; bool parse_function_child_block(const BlockOrRecord &entry); bool parse_record(const BlockOrRecord &entry); bool parse_constants_record(const BlockOrRecord &entry); bool parse_constants_block(const BlockOrRecord &entry); bool parse_metadata_block(const BlockOrRecord &entry); bool parse_paramattr_block(const BlockOrRecord &entry); bool parse_paramattr_group_block(const BlockOrRecord &entry); bool parse_metadata_attachment_record(const BlockOrRecord &entry); bool parse_metadata_record(const BlockOrRecord &entry, unsigned index); Type *get_constant_type(); bool parse_function_body(const BlockOrRecord &entry); bool parse_types(const BlockOrRecord &entry); bool parse_value_symtab(const BlockOrRecord &entry); bool parse_function_record(const BlockOrRecord &entry); bool parse_global_variable_record(const BlockOrRecord &entry); bool parse_version_record(const BlockOrRecord &entry); bool parse_type(const BlockOrRecord &entry); bool add_instruction(Instruction *inst); bool add_value(Value *value); bool add_type(Type *type); Type *get_type(uint64_t index); bool finish_basic_block(); bool add_successor(BasicBlock *bb); BasicBlock *get_basic_block(uint64_t index) const; BasicBlock *current_bb = nullptr; unsigned basic_block_index = 0; Value *get_value(uint64_t op, Type *expected_type = nullptr, bool force_absolute = false); std::pair get_value_and_type(const Vector &ops, unsigned &index); Value *get_value(const Vector &ops, unsigned &index, Type *expected_type); Value *get_value_signed(uint64_t op, Type *expected_type = nullptr); MDOperand *get_metadata(uint64_t index) const; const char *get_metadata_kind(uint64_t index) const; Instruction *get_instruction(uint64_t index) const; Vector pending_forward_references; Vector> global_initializations; bool resolve_forward_references(); bool resolve_global_initializations(); uint64_t tween_id = 1; uint64_t metadata_tween_id = 1; bool use_relative_id = true; bool use_strtab = false; bool seen_first_function_body = false; }; ValueProxy::ValueProxy(Type *type, ModuleParseContext &context_, uint64_t id_) : Value(type, ValueKind::Proxy) , id(id_) , context(context_) { } bool ValueProxy::resolve() { if (proxy) return true; if (id >= context.values.size()) { LOGE("Value proxy is out of range.\n"); return false; } proxy = context.values[id]; while (proxy && proxy->get_value_kind() == ValueKind::Proxy) { cast(proxy)->resolve(); proxy = cast(proxy)->get_proxy_value(); } if (!proxy) { LOGE("Failed to resolve proxy value.\n"); return false; } return true; } Value *ValueProxy::get_proxy_value() const { return proxy; } bool ModuleParseContext::finish_basic_block() { basic_block_index++; if (basic_block_index >= basic_blocks.size()) current_bb = nullptr; else { current_bb = basic_blocks[basic_block_index]; current_bb->set_tween_id(tween_id++); } return true; } bool ModuleParseContext::add_successor(BasicBlock *bb) { if (!current_bb) { LOGE("No basic block is active in add_successor().\n"); return false; } current_bb->add_successor(bb); return true; } BasicBlock *ModuleParseContext::get_basic_block(uint64_t index) const { if (index >= basic_blocks.size()) { LOGE("Basic block index is out of bounds!\n"); return nullptr; } return basic_blocks[index]; } Value *ModuleParseContext::get_value(uint64_t op, Type *expected_type, bool force_absolute) { if (!force_absolute && use_relative_id) op = uint32_t(values.size() - op); if (op >= values.size()) { if (!expected_type) { LOGE("Must have an expected type for forward references!\n"); return nullptr; } auto *proxy = context->construct(expected_type, *this, op); pending_forward_references.push_back(proxy); return proxy; } else { // A pointer to function and a constant function do match. if (auto *ptr_type = dyn_cast(expected_type)) if (ptr_type->getPointerElementType()->getTypeID() == Type::TypeID::FunctionTyID) expected_type = ptr_type->getPointerElementType(); if (expected_type && expected_type != values[op]->getType()) { LOGE("Type mismatch.\n"); return nullptr; } return values[op]; } } Value *ModuleParseContext::get_value(const Vector &ops, unsigned &index, Type *expected_type) { if (index >= ops.size()) return nullptr; return get_value(ops[index++], expected_type); } std::pair ModuleParseContext::get_value_and_type(const Vector &ops, unsigned &index) { if (index >= ops.size()) return {}; uint64_t op = ops[index++]; if (use_relative_id) op = uint32_t(values.size() - op); if (op < values.size()) { // Normal reference. return { values[op], values[op]->getType() }; } else { // Forward reference, the type is encoded in the next element. if (index >= ops.size()) return {}; auto *type = get_type(ops[index++]); auto *proxy = context->construct(type, *this, op); pending_forward_references.push_back(proxy); return { proxy, type }; } } Instruction *ModuleParseContext::get_instruction(uint64_t index) const { if (index >= instructions.size()) { LOGE("Instruction index is out of range!\n"); return nullptr; } return instructions[index]; } MDOperand *ModuleParseContext::get_metadata(uint64_t index) const { auto itr = metadata.find(index); if (itr != metadata.end()) return itr->second; else { // Need to return a null-node like this since MDOperand is used as a reference in the LLVM API for some reason. return context->construct(module); } } const char *ModuleParseContext::get_metadata_kind(uint64_t index) const { auto itr = metadata_kind_map.find(index); if (itr != metadata_kind_map.end()) return itr->second.c_str(); else return nullptr; } Value *ModuleParseContext::get_value_signed(uint64_t op, Type *expected_type) { int64_t signed_op = decode_sign_rotated_value(op); if (use_relative_id) signed_op = values.size() - signed_op; op = signed_op; if (op >= values.size()) { if (!expected_type) { LOGE("Must have an expected type for forward references!\n"); return nullptr; } auto *proxy = context->construct(expected_type, *this, op); pending_forward_references.push_back(proxy); return proxy; } else return values[op]; } bool ModuleParseContext::add_instruction(Instruction *inst) { instructions.push_back(inst); if (current_bb) current_bb->add_instruction(inst); else { LOGE("No basic block is currently set!\n"); return false; } if (inst->isTerminator()) return finish_basic_block(); else return add_value(inst); } bool ModuleParseContext::add_value(Value *value) { if (value->getType()->getTypeID() != Type::TypeID::VoidTyID) { value->set_tween_id(tween_id++); values.push_back(value); } return true; } Type *ModuleParseContext::get_constant_type() { if (constant_type) return constant_type; else return Type::getInt32Ty(*context); } static Type *resolve_gep_element_type(Type *type, const Vector &args) { for (unsigned i = 2; i < args.size(); i++) { auto *arg = args[i]; if (type->getTypeID() == Type::TypeID::StructTyID) { auto *const_int = dyn_cast(arg); if (!const_int) { LOGE("Indexing into a struct without a constant integer.\n"); return nullptr; } unsigned index = const_int->getUniqueInteger().getZExtValue(); if (index >= cast(type)->getNumElements()) { LOGE("Struct element index out of range.\n"); return nullptr; } type = cast(type)->getElementType(index); } else if (type->getTypeID() == Type::TypeID::ArrayTyID) { type = type->getArrayElementType(); } else if (type->getTypeID() == Type::TypeID::VectorTyID) { type = cast(type)->getElementType(); } else return nullptr; } return type; } static BinaryOperator::BinaryOps translate_binop(BinOp op, Type *type) { bool is_fp = type->isFloatingPointTy(); switch (op) { case BinOp::ADD: return is_fp ? BinaryOperator::BinaryOps::FAdd : BinaryOperator::BinaryOps::Add; case BinOp::SUB: return is_fp ? BinaryOperator::BinaryOps::FSub : BinaryOperator::BinaryOps::Sub; case BinOp::MUL: return is_fp ? BinaryOperator::BinaryOps::FMul : BinaryOperator::BinaryOps::Mul; case BinOp::UDIV: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::UDiv; case BinOp::SDIV: return is_fp ? BinaryOperator::BinaryOps::FDiv : BinaryOperator::BinaryOps::SDiv; case BinOp::UREM: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::URem; case BinOp::SREM: return is_fp ? BinaryOperator::BinaryOps::FRem : BinaryOperator::BinaryOps::SRem; case BinOp::SHL: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::Shl; case BinOp::LSHR: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::LShr; case BinOp::ASHR: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::AShr; case BinOp::AND: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::And; case BinOp::OR: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::Or; case BinOp::XOR: return is_fp ? BinaryOperator::BinaryOps::InvalidBinaryOp : BinaryOperator::BinaryOps::Xor; default: return BinaryOperator::BinaryOps::InvalidBinaryOp; } } static Instruction::CastOps translate_castop(CastOp op) { switch (op) { case CastOp::TRUNC: return Instruction::Trunc; case CastOp::ZEXT: return Instruction::ZExt; case CastOp::SEXT: return Instruction::SExt; case CastOp::FPTOUI: return Instruction::FPToUI; case CastOp::FPTOSI: return Instruction::FPToSI; case CastOp::UITOFP: return Instruction::UIToFP; case CastOp::SITOFP: return Instruction::SIToFP; case CastOp::FPTRUNC: return Instruction::FPTrunc; case CastOp::FPEXT: return Instruction::FPExt; case CastOp::PTRTOINT: return Instruction::PtrToInt; case CastOp::INTTOPTR: return Instruction::IntToPtr; case CastOp::BITCAST: return Instruction::BitCast; case CastOp::ADDSPACECAST: return Instruction::AddrSpaceCast; } return Instruction::CastOps::InvalidCastOp; } bool ModuleParseContext::parse_constants_record(const BlockOrRecord &entry) { if (entry.IsBlock()) return true; switch (ConstantsRecord(entry.id)) { case ConstantsRecord::SETTYPE: if (entry.ops.size() < 1) return false; constant_type = get_type(entry.ops[0]); break; case ConstantsRecord::CONST_NULL: { auto *type = get_constant_type(); Value *value = nullptr; if (type->isIntegerTy()) value = ConstantInt::get(type, 0); else if (type->isFloatingPointTy()) value = ConstantFP::get(type, 0); else if (isa(type) || isa(type) || isa(type)) value = context->construct(type); else if (isa(type)) value = context->construct(type); if (!value) { LOGE("Unknown type for CONST_NULL.\n"); return false; } values.push_back(value); break; } case ConstantsRecord::UNDEF: { auto *type = get_constant_type(); values.push_back(UndefValue::get(type)); break; } case ConstantsRecord::INTEGER: { if (entry.ops.size() < 1) return false; auto *type = get_constant_type(); if (!type->isIntegerTy()) { LOGE("Constant type is not integer.\n"); return false; } uint64_t literal = entry.ops[0]; int64_t signed_literal = decode_sign_rotated_value(literal); ConstantInt *value = ConstantInt::get(type, signed_literal); values.push_back(value); break; } case ConstantsRecord::WIDE_INTEGER: LOGE("WIDE_INTEGER unimplemented.\n"); return false; case ConstantsRecord::FLOAT: { if (entry.ops.size() < 1) return false; auto *type = get_constant_type(); if (!type->isFloatingPointTy()) { LOGE("Constant type is not FP.\n"); return false; } ConstantFP *value = ConstantFP::get(type, entry.ops[0]); values.push_back(value); break; } case ConstantsRecord::AGGREGATE: { Vector constants; Value *value; constants.reserve(entry.ops.size()); if (auto *struct_type = dyn_cast(get_constant_type())) { if (entry.ops.size() != struct_type->getStructNumElements()) { LOGE("Mismatch in struct element counts.\n"); return false; } for (unsigned i = 0; i < struct_type->getStructNumElements(); i++) constants.push_back(get_value(entry.ops[i], struct_type->getStructElementType(i), true)); value = context->construct(get_constant_type(), std::move(constants)); } else if (isa(get_constant_type())) { auto *element_type = get_constant_type()->getArrayElementType(); for (auto &op : entry.ops) constants.push_back(get_value(op, element_type, true)); value = context->construct(get_constant_type(), std::move(constants)); } else if (isa(get_constant_type())) { auto *element_type = cast(get_constant_type())->getElementType(); for (auto &op : entry.ops) constants.push_back(get_value(op, element_type, true)); value = context->construct(get_constant_type(), std::move(constants)); } else { value = UndefValue::get(get_constant_type()); } values.push_back(value); break; } case ConstantsRecord::STRING: LOGE("STRING unimplemented.\n"); return false; case ConstantsRecord::BINOP: { unsigned index = 0; auto *type = get_constant_type(); auto op = translate_binop(BinOp(entry.ops[index++]), type); auto *a = get_value(entry.ops[index++], type, true); auto *b = get_value(entry.ops[index++], type, true); auto elements = Vector{a, b}; Value *value = context->construct(op, type, std::move(elements)); values.push_back(value); break; } case ConstantsRecord::CE_CAST: { unsigned index = 0; auto op = translate_castop(CastOp(entry.ops[index++])); auto *type = get_constant_type(); auto *input_value_type = get_type(entry.ops[index++]); if (!input_value_type) return false; auto *input_value = get_value(entry.ops[index++], input_value_type, true); auto elements = Vector{input_value}; Value *value = context->construct(op, type, std::move(elements)); values.push_back(value); break; } case ConstantsRecord::DATA: { bool is_vector = false; Type *element_type = nullptr; if (isa(get_constant_type())) element_type = get_constant_type()->getArrayElementType(); else if (isa(get_constant_type())) { element_type = cast(get_constant_type())->getElementType(); is_vector = true; } else { LOGE("Unknown DATA type.\n"); return false; } bool is_fp = element_type->isFloatingPointTy(); bool is_int = element_type->isIntegerTy(); Vector constants; constants.reserve(entry.ops.size()); if (is_fp) { for (auto &op : entry.ops) constants.push_back(ConstantFP::get(element_type, op)); } else if (is_int) { for (auto &op : entry.ops) constants.push_back(ConstantInt::get(element_type, op)); } else { LOGE("Unknown DATA type.\n"); return false; } Value *value; if (is_vector) value = context->construct(get_constant_type(), std::move(constants)); else value = context->construct(get_constant_type(), std::move(constants)); values.push_back(value); break; } case ConstantsRecord::GEP: case ConstantsRecord::INBOUNDS_GEP: case ConstantsRecord::GEP_WITH_INRANGE_INDEX: { if (entry.ops.size() < 2) return false; Type *pointee_type = nullptr; unsigned index = 0; if (ConstantsRecord(entry.id) == ConstantsRecord::GEP_WITH_INRANGE_INDEX || (entry.ops.size() & 1)) { pointee_type = get_type(entry.ops[index++]); } if (ConstantsRecord(entry.id) == ConstantsRecord::GEP_WITH_INRANGE_INDEX) index++; Vector elements; elements.reserve(entry.ops.size() / 2); while (index < entry.ops.size()) { auto *type = get_type(entry.ops[index++]); auto *value = get_value(entry.ops[index++], type, true); elements.push_back(value); } if (elements.size() < 2) return false; if (!pointee_type) pointee_type = elements[0]->getType()->getPointerElementType(); pointee_type = resolve_gep_element_type(pointee_type, elements); if (!pointee_type) return false; pointee_type = PointerType::get(pointee_type, cast(elements[0]->getType())->getAddressSpace()); auto *value = context->construct(Instruction::GetElementPtr, pointee_type, std::move(elements)); values.push_back(value); break; } default: LOGE("UNKNOWN unimplemented.\n"); return false; } return true; } bool ModuleParseContext::parse_constants_block(const BlockOrRecord &entry) { constant_type = nullptr; for (auto &child : entry.children) if (!parse_constants_record(child)) return false; return true; } bool ModuleParseContext::parse_metadata_attachment_record(const BlockOrRecord &entry) { if (MetaDataRecord(entry.id) != MetaDataRecord::ATTACHMENT) return true; if (entry.ops.size() < 1) return false; size_t size = entry.ops.size(); size_t num_nodes = (size - 1) / 2; auto *inst = get_instruction(entry.ops[0]); if (!inst) { LOGE("Invalid instruction.\n"); return false; } for (size_t i = 0; i < num_nodes; i++) { auto *kind = get_metadata_kind(entry.ops[2 * i + 1]); auto *operand = get_metadata(entry.ops[2 * i + 2]); auto *node = dyn_cast(operand); if (!kind) { LOGE("Invalid metadata kind.\n"); return false; } if (!node) { LOGE("Invalid metadata attachment.\n"); return false; } inst->setMetadata(kind, node); } return true; } bool ModuleParseContext::parse_metadata_record(const BlockOrRecord &entry, unsigned index) { switch (MetaDataRecord(entry.id)) { case MetaDataRecord::NAME: { current_metadata_name = entry.getString(); break; } case MetaDataRecord::NAMED_NODE: { Vector ops; ops.reserve(entry.ops.size()); for (auto &op : entry.ops) { auto *md = get_metadata(op); auto *node = dyn_cast(md); ops.push_back(node); } auto *node = context->construct(module, current_metadata_name, std::move(ops)); module->add_named_metadata(current_metadata_name, node); metadata[index] = node; break; } case MetaDataRecord::DISTINCT_NODE: case MetaDataRecord::NODE: { Vector ops; ops.reserve(entry.ops.size()); for (auto &op : entry.ops) { // For some reason, here metadata is indexed with -1? auto *md = get_metadata(op - 1); ops.push_back(md); } auto *node = context->construct(module, std::move(ops)); node->set_tween_id(metadata_tween_id++); module->add_unnamed_metadata(node); metadata[index] = node; break; } case MetaDataRecord::STRING_OLD: { auto *node = context->construct(module, entry.getString()); metadata[index] = node; break; } case MetaDataRecord::VALUE: { if (entry.ops.size() < 2) return false; auto *value = get_value(entry.ops[1], nullptr, true); if (!value) { LOGE("Null value!\n"); return false; } auto *constant_value = dyn_cast(value); if (!constant_value) { LOGE("Not a constant!\n"); return false; } auto *node = context->construct(module, constant_value); metadata[index] = node; break; } case MetaDataRecord::KIND: { if (entry.ops.size() < 1) return false; metadata_kind_map[entry.ops[0]] = entry.getString(1); break; } default: break; } return true; } bool ModuleParseContext::parse_metadata_block(const BlockOrRecord &entry) { unsigned index = 0; for (auto &child : entry.children) if (!parse_metadata_record(child, index++)) return false; return true; } bool ModuleParseContext::parse_paramattr_block(const BlockOrRecord &entry) { for (auto &child : entry.children) { if (!child.IsRecord()) continue; // Don't support the OLD variant unless we observe it in the wild. // DXC doesn't generate it. if (AttributeCodes(child.id) != AttributeCodes::CodeEntry) return false; Vector> pairs; for (auto op : child.ops) { auto &grp = attribute_groups[op]; for (auto &elem : grp) pairs.push_back(elem); } attribute_lists.push_back(std::move(pairs)); } return true; } bool ModuleParseContext::parse_paramattr_group_block(const BlockOrRecord &entry) { if (!attribute_groups.empty()) { LOGE("Cannot use multiple group blocks.\n"); return false; } for (auto &child : entry.children) { if (!child.IsRecord()) continue; if (AttributeCodes(child.id) != AttributeCodes::GroupCodeEntry) continue; if (child.ops.size() < 3) return false; uint64_t group_id = child.ops[0]; uint64_t index = child.ops[1]; if (index != ~0u) // Only care about attributes on function scope continue; auto &attr_group = attribute_groups[group_id]; size_t i = 2; size_t count = child.ops.size(); while (i < count) { if (child.ops[i] == 0) // Enum attribute, skip 2 values { i += 2; } else if (child.ops[i] == 1) // Integer attribute, skip 2 or 3 values { i++; if (i >= count) return false; switch (AttributeRecord(child.ops[i++])) { case AttributeRecord::ALIGNMENT: case AttributeRecord::STACK_ALIGNMENT: case AttributeRecord::ALLOC_SIZE: case AttributeRecord::DEREFERENCEABLE: case AttributeRecord::DEREFERENCEABLE_OR_NULL: i++; break; default: break; } } else if (child.ops[i] == 3 || child.ops[i] == 4) // String attribute { bool has_value = child.ops[i++] == 4; String kind, value; while (child.ops[i] != 0 && i < count) kind.push_back(char(child.ops[i++])); if (child.ops[i] != 0) return false; i++; if (has_value) { while (child.ops[i] != 0 && i < count) value.push_back(char(child.ops[i++])); if (child.ops[i] != 0) return false; i++; } attr_group.emplace_back(std::move(kind), std::move(value)); } else if (child.ops[i] == 5 || child.ops[i] == 6) // Value attribute { bool has_type = child.ops[i++] == 6; if (i >= count) return false; if (AttributeRecord(child.ops[i++]) == AttributeRecord::BY_VAL && has_type) i++; } else return false; } if (i > count) return false; } return true; } bool ModuleParseContext::parse_function_child_block(const BlockOrRecord &entry) { switch (KnownBlocks(entry.id)) { case KnownBlocks::CONSTANTS_BLOCK: { for (auto &child : entry.children) if (!parse_constants_record(child)) return false; break; } case KnownBlocks::METADATA_ATTACHMENT: { for (auto &child : entry.children) if (!parse_metadata_attachment_record(child)) return false; break; } default: break; } return true; } static UnaryOperator::UnaryOps translate_uop(UnaryOp op, Type *type) { bool is_fp = type->isFloatingPointTy(); if (op == UnaryOp::FNEG && is_fp) return UnaryOperator::UnaryOps::FNeg; else return UnaryOperator::UnaryOps::Invalid; } static AtomicRMWInst::BinOp translate_atomic_binop(AtomicBinOp op) { switch (op) { case AtomicBinOp::RMW_XCHG: return AtomicRMWInst::BinOp::Xchg; case AtomicBinOp::RMW_ADD: return AtomicRMWInst::BinOp::Add; case AtomicBinOp::RMW_SUB: return AtomicRMWInst::BinOp::Sub; case AtomicBinOp::RMW_AND: return AtomicRMWInst::BinOp::And; case AtomicBinOp::RMW_NAND: return AtomicRMWInst::BinOp::Nand; case AtomicBinOp::RMW_OR: return AtomicRMWInst::BinOp::Or; case AtomicBinOp::RMW_XOR: return AtomicRMWInst::BinOp::Xor; case AtomicBinOp::RMW_MAX: return AtomicRMWInst::BinOp::Max; case AtomicBinOp::RMW_MIN: return AtomicRMWInst::BinOp::Min; case AtomicBinOp::RMW_UMAX: return AtomicRMWInst::BinOp::UMax; case AtomicBinOp::RMW_UMIN: return AtomicRMWInst::BinOp::UMin; case AtomicBinOp::RMW_FADD: return AtomicRMWInst::BinOp::FAdd; case AtomicBinOp::RMW_FSUB: return AtomicRMWInst::BinOp::FSub; default: return AtomicRMWInst::BinOp::Invalid; } } bool ModuleParseContext::parse_record(const BlockOrRecord &entry) { switch (FunctionRecord(entry.id)) { case FunctionRecord::DECLAREBLOCKS: { if (entry.ops.size() < 1) return false; basic_blocks.resize(entry.ops[0]); basic_block_index = 0; for (auto &bb : basic_blocks) bb = context->construct(*context); current_bb = basic_blocks.front(); break; } case FunctionRecord::INST_CALL: { unsigned index = 1; if (index >= entry.ops.size()) return false; auto CCInfo = entry.ops[index++]; if (CCInfo & CALL_FMF_BIT) { if (index >= entry.ops.size()) return false; auto fmf = entry.ops[index++]; (void)fmf; } FunctionType *function_type = nullptr; if (CCInfo & CALL_EXPLICIT_TYPE_BIT) { if (index >= entry.ops.size()) return false; function_type = cast(get_type(entry.ops[index++])); } if (index >= entry.ops.size()) return false; auto *callee = dyn_cast(get_value(entry.ops[index++])); if (!callee) return false; if (!function_type) function_type = callee->getFunctionType(); if (!function_type) return false; unsigned num_params = function_type->getNumParams(); if (entry.ops.size() != index + num_params) { LOGE("Number of params does not match record.\n"); return false; } Vector params; params.reserve(num_params); for (unsigned i = 0; i < num_params; i++) { auto *arg = get_value(entry.ops[index + i], function_type->getParamType(i)); if (!arg) return false; params.push_back(arg); } auto *value = context->construct(function_type, callee, std::move(params)); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_RET: { Value *return_val = !entry.ops.empty() ? get_value(entry.ops[0]) : nullptr; auto *ret = context->construct(return_val); if (!add_instruction(ret)) return false; break; } case FunctionRecord::INST_UNREACHABLE: { auto *ret = context->construct(); if (!add_instruction(ret)) return false; break; } case FunctionRecord::INST_UNOP: { unsigned index = 0; auto val = get_value_and_type(entry.ops, index); if (!val.first) return false; if (index == entry.ops.size()) return false; auto op = UnaryOp(entry.ops[index++]); auto *value = context->construct(translate_uop(op, val.second), val.first); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_CMP: case FunctionRecord::INST_CMP2: { unsigned index = 0; auto lhs = get_value_and_type(entry.ops, index); if (!lhs.first) return false; auto *rhs = get_value(entry.ops, index, lhs.second); if (index == entry.ops.size()) return false; auto pred = Instruction::Predicate(entry.ops[index++]); if (!rhs) return false; Instruction *value = nullptr; if (lhs.second->isFloatingPointTy()) value = context->construct(pred, lhs.first, rhs); else value = context->construct(pred, lhs.first, rhs); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_PHI: { if (entry.ops.size() < 1) return false; auto *type = get_type(entry.ops[0]); size_t num_args = (entry.ops.size() - 1) / 2; auto *phi_node = context->construct(type, num_args); for (size_t i = 0; i < num_args; i++) { Value *value = nullptr; if (use_relative_id) value = get_value_signed(entry.ops[2 * i + 1], type); else value = get_value(entry.ops[2 * i + 1], type); BasicBlock *bb = get_basic_block(entry.ops[2 * i + 2]); if (!value || !bb) return false; phi_node->add_incoming(value, bb); } if (!add_instruction(phi_node)) return false; break; } case FunctionRecord::INST_BINOP: { unsigned index = 0; auto lhs = get_value_and_type(entry.ops, index); if (!lhs.first) return false; auto *rhs = get_value(entry.ops, index, lhs.second); if (!lhs.first || !rhs) return false; if (index == entry.ops.size()) return false; auto op = BinOp(entry.ops[index++]); auto *value = context->construct(lhs.first, rhs, translate_binop(op, lhs.second)); if (index < entry.ops.size()) { // Only relevant for FP math, but we only look at fast math state for // FP operations anyways. auto fast_math_flags = entry.ops[index]; bool fast = (fast_math_flags & (FAST_MATH_UNSAFE_ALGEBRA_BIT | FAST_MATH_ALLOW_CONTRACT_BIT)) != 0; value->setFast(fast); } if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_ATOMICRMW: { unsigned index = 0; auto ptr = get_value_and_type(entry.ops, index); if (!ptr.first || !isa(ptr.second)) return false; auto *val = get_value(entry.ops, index, ptr.second->getPointerElementType()); if (!val) return false; if (index == entry.ops.size()) return false; AtomicRMWInst::BinOp op = translate_atomic_binop(AtomicBinOp(entry.ops[index++])); auto *value = context->construct(val->getType(), ptr.first, val, op); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_CMPXCHG: { unsigned index = 0; auto ptr = get_value_and_type(entry.ops, index); auto cmp = get_value_and_type(entry.ops, index); if (!ptr.first || !cmp.first || !isa(ptr.second)) return false; auto *new_value = get_value(entry.ops, index, cmp.second); auto *value = context->construct(ptr.first, cmp.first, new_value); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_CAST: { unsigned index = 0; auto input_value = get_value_and_type(entry.ops, index); if (!input_value.first || index + 2 > entry.ops.size()) return false; auto *type = get_type(entry.ops[index++]); if (!type) return false; auto op = Instruction::CastOps(translate_castop(CastOp(entry.ops[index++]))); auto *value = context->construct(type, input_value.first, op); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_SELECT: case FunctionRecord::INST_VSELECT: { unsigned index = 0; auto true_value = get_value_and_type(entry.ops, index); if (!true_value.first || index + 2 > entry.ops.size()) return false; auto *false_value = get_value(entry.ops[index++], true_value.second); auto *cond_value = get_value(entry.ops[index++], Type::getInt1Ty(*context)); if (!false_value || !cond_value) return false; auto *value = context->construct(true_value.first, false_value, cond_value); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_EXTRACTVAL: { unsigned index = 0; auto aggregate = get_value_and_type(entry.ops, index); if (!aggregate.first) return false; if (index == entry.ops.size()) return false; Vector indices; indices.reserve(entry.ops.size() - index); unsigned num_args = entry.ops.size(); Type *type = aggregate.second; for (; index < num_args; index++) { auto element = unsigned(entry.ops[index]); if (type->getTypeID() == Type::TypeID::StructTyID) { if (element >= cast(type)->getNumElements()) { LOGE("Struct element index out of range.\n"); return false; } type = cast(type)->getElementType(element); } else if (type->getTypeID() == Type::TypeID::ArrayTyID) { type = type->getArrayElementType(); } else if (type->getTypeID() == Type::TypeID::VectorTyID) { type = cast(type)->getElementType(); } else return false; // DXIL does not support vectors, so we're not supposed to index into them any further. indices.push_back(element); } auto *value = context->construct(type, aggregate.first, std::move(indices)); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_BR: { if (entry.ops.size() < 1) return false; auto *true_block = get_basic_block(entry.ops[0]); if (!true_block) return false; if (!add_successor(true_block)) return false; if (entry.ops.size() == 1) { auto *value = context->construct(true_block); if (!add_instruction(value)) return false; } else if (entry.ops.size() == 3) { auto *false_block = get_basic_block(entry.ops[1]); if (!false_block) return false; if (!add_successor(false_block)) return false; auto *cond = get_value(entry.ops[2], Type::getInt1Ty(*context)); auto *value = context->construct(true_block, false_block, cond); if (!add_instruction(value)) return false; } else return false; break; } case FunctionRecord::INST_SWITCH: { if (entry.ops.size() < 3) return false; auto *type = get_type(entry.ops[0]); auto *cond = get_value(entry.ops[1]); auto *default_block = get_basic_block(entry.ops[2]); if (!type || !cond || !default_block) return false; if (!add_successor(default_block)) return false; unsigned num_cases = (entry.ops.size() - 3) / 2; auto *inst = context->construct(cond, default_block, num_cases); for (unsigned i = 0; i < num_cases; i++) { // For some reason, case values are encoded in absolute terms. auto *case_value = get_value(entry.ops[3 + 2 * i], type, true); BasicBlock *bb = get_basic_block(entry.ops[4 + 2 * i]); if (!case_value || !bb) { LOGE("Invalid switch record.\n"); return false; } if (!add_successor(bb)) return false; inst->addCase(case_value, bb); } if (!add_instruction(inst)) return false; break; } case FunctionRecord::INST_ALLOCA: { if (entry.ops.size() < 3) return false; auto *allocated_type = get_type(entry.ops[0]); auto *type = get_type(entry.ops[1]); auto *size = get_value(entry.ops[2], nullptr, true); if (!allocated_type || !type || !size) return false; auto *ptr_type = PointerType::get(allocated_type, 0); auto *value = context->construct(ptr_type, type, size); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_GEP: { if (entry.ops.size() < 3) return false; bool inbounds = entry.ops[0] != 0; auto *type = get_type(entry.ops[1]); unsigned count = entry.ops.size(); Vector args; args.reserve(count); for (unsigned i = 2; i < count;) { auto value = get_value_and_type(entry.ops, i); if (!value.first) return false; args.push_back(value.first); } type = resolve_gep_element_type(type, args); if (!type) return false; type = PointerType::get(type, cast(args[0]->getType())->getAddressSpace()); auto *value = context->construct(type, std::move(args), inbounds); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_LOAD: { unsigned index = 0; auto ptr = get_value_and_type(entry.ops, index); if (index + 2 != entry.ops.size() && index + 3 != entry.ops.size()) return false; if (!ptr.first || !isa(ptr.second)) { LOGE("Loading from something that is not a pointer.\n"); return false; } Type *loaded_type = nullptr; if (index + 3 == entry.ops.size()) loaded_type = get_type(entry.ops[index++]); else loaded_type = cast(ptr.second)->getElementType(); auto *value = context->construct(loaded_type, ptr.first); add_instruction(value); break; } case FunctionRecord::INST_STORE: { unsigned index = 0; auto ptr = get_value_and_type(entry.ops, index); auto val = get_value_and_type(entry.ops, index); if (!ptr.first || !val.first || index + 2 != entry.ops.size()) return false; auto *value = context->construct(ptr.first, val.first); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_SHUFFLEVEC: { unsigned index = 0; auto a = get_value_and_type(entry.ops, index); auto *b = get_value(entry.ops, index, a.second); auto shuf = get_value_and_type(entry.ops, index); if (!a.first || !b || !shuf.first || !isa(a.second)) return false; auto *vec_type = VectorType::get(cast(shuf.first)->getNumElements(), cast(a.second)->getElementType()); auto *value = context->construct(vec_type, a.first, b, shuf.first); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_EXTRACTELT: { unsigned index = 0; auto vec = get_value_and_type(entry.ops, index); if (!vec.first || !isa(vec.second)) return false; auto element_index = get_value_and_type(entry.ops, index); if (!element_index.first) return false; auto *value = context->construct(vec.first, element_index.first); if (!add_instruction(value)) return false; break; } case FunctionRecord::INST_INSERTELT: { unsigned index = 0; auto vec = get_value_and_type(entry.ops, index); if (!vec.first || !isa(vec.second)) return false; auto *value = get_value(entry.ops, index, cast(vec.second)->getElementType()); auto element_index = get_value_and_type(entry.ops, index); if (!value || !element_index.first) return false; auto *new_value = context->construct(vec.first, value, element_index.first); if (!add_instruction(new_value)) return false; break; } default: LOGE("Unhandled instruction!\n"); return false; } return true; } bool ModuleParseContext::resolve_forward_references() { for (auto *ref : pending_forward_references) if (!ref->resolve()) return false; pending_forward_references.clear(); for (auto *bb : basic_blocks) for (auto &inst : *bb) if (!inst.resolve_proxy_values()) return false; return true; } bool ModuleParseContext::resolve_global_initializations() { for (auto &ref : global_initializations) { Value *value = get_value(ref.second, nullptr, true); if (!value) return false; auto *constant_value = dyn_cast(value); if (!constant_value) { LOGE("Global initializer is not a constant!\n"); return false; } ref.first->set_initializer(constant_value); } global_initializations.clear(); return true; } bool ModuleParseContext::parse_function_body(const BlockOrRecord &entry) { auto global_values = values; // I think we are supposed to process functions in same order as the module declared them? if (!seen_first_function_body) { std::reverse(functions_with_bodies.begin(), functions_with_bodies.end()); seen_first_function_body = true; } if (functions_with_bodies.empty()) { LOGE("No more functions to process?\n"); return false; } function = functions_with_bodies.back(); functions_with_bodies.pop_back(); auto *func_type = function->getFunctionType(); for (unsigned i = 0; i < func_type->getNumParams(); i++) { auto *param_type = func_type->getParamType(i); auto *arg = context->construct(param_type, i); function->add_argument(arg); add_value(arg); } for (auto &child : entry.children) { if (child.IsBlock()) { if (!parse_function_child_block(child)) return false; } else { if (!parse_record(child)) return false; } } if (!resolve_forward_references()) return false; if (!resolve_global_initializations()) return false; function->set_basic_blocks(std::move(basic_blocks)); basic_blocks = {}; basic_block_index = 0; module->add_function_implementation(function); values = global_values; instructions.clear(); return true; } bool ModuleParseContext::parse_type(const BlockOrRecord &child) { Type *type = nullptr; switch (TypeRecord(child.id)) { case TypeRecord::NUMENTRY: case TypeRecord::STRUCT_NAME: return true; case TypeRecord::VOID_TYPE: type = Type::getVoidTy(*context); break; case TypeRecord::HALF: type = Type::getHalfTy(*context); break; case TypeRecord::FLOAT: type = Type::getFloatTy(*context); break; case TypeRecord::DOUBLE: type = Type::getDoubleTy(*context); break; case TypeRecord::POINTER: { if (child.ops.size() < 2) return false; auto *pointee_type = get_type(child.ops[0]); if (!pointee_type) return false; type = PointerType::get(pointee_type, child.ops[1]); break; } case TypeRecord::ARRAY: { if (child.ops.size() < 2) return false; auto *elem_type = get_type(child.ops[1]); if (!elem_type) return false; type = ArrayType::get(elem_type, child.ops[0]); break; } case TypeRecord::INTEGER: { if (child.ops.size() < 1) return false; auto bit_width = child.ops[0]; if (bit_width <= 64) type = Type::getIntTy(*context, unsigned(bit_width)); else return false; break; } case TypeRecord::STRUCT_NAMED: case TypeRecord::STRUCT_ANON: { if (child.ops.size() < 1) return false; Vector members; unsigned num_members = child.ops.size() - 1; members.reserve(num_members); for (unsigned i = 0; i < num_members; i++) members.push_back(get_type(child.ops[i + 1])); type = StructType::get(*context, std::move(members)); break; } case TypeRecord::VECTOR: { if (child.ops.size() < 2) return false; auto *elem_type = get_type(child.ops[1]); if (!elem_type) return false; type = VectorType::get(child.ops[0], elem_type); break; } case TypeRecord::FUNCTION: { if (child.ops.size() < 2) return false; Vector argument_types; argument_types.reserve(child.ops.size() - 2); for (size_t i = 2; i < child.ops.size(); i++) argument_types.push_back(get_type(child.ops[i])); auto *func_type = get_type(child.ops[1]); if (!func_type) return false; type = context->construct(*context, func_type, std::move(argument_types)); break; } case TypeRecord::LABEL: { type = Type::getLabelTy(*context); break; } case TypeRecord::METADATA: { type = Type::getMetadataTy(*context); break; } case TypeRecord::OPAQUE_TYPE: { type = Type::getOpaqueTy(*context); break; } default: LOGE("Unknown type!\n"); return false; } add_type(type); return true; } bool ModuleParseContext::parse_types(const BlockOrRecord &entry) { for (auto &child : entry.children) if (!parse_type(child)) return false; return true; } bool ModuleParseContext::parse_value_symtab(const BlockOrRecord &entry) { for (auto &symtab : entry.children) { switch (ValueSymtabRecord(symtab.id)) { case ValueSymtabRecord::ENTRY: { if (symtab.ops.size() < 1) return false; auto name = symtab.getString(1); module->add_value_name(symtab.ops[0], name); break; } default: break; } } return true; } static GlobalVariable::LinkageTypes decode_linkage(uint64_t v) { switch (v) { case 0: case 5: case 6: case 15: return GlobalVariable::ExternalLinkage; case 2: return GlobalVariable::AppendingLinkage; default: return GlobalVariable::InternalLinkage; } } bool ModuleParseContext::parse_global_variable_record(const BlockOrRecord &entry) { if (use_strtab) { LOGE("Unknown module code 2 which uses strtab.\n"); return false; } if (entry.ops.size() < 4) return false; auto *type = get_type(entry.ops[0]); bool is_const = (entry.ops[1] & 1) != 0; bool explicit_type = (entry.ops[1] & 2) != 0; unsigned address_space = 0; if (explicit_type) address_space = entry.ops[1] >> 2; else { address_space = cast(type)->getAddressSpace(); type = cast(type)->getElementType(); } if (!type) return false; auto linkage = decode_linkage(entry.ops[3]); auto *value = context->construct(PointerType::get(type, address_space), linkage, is_const); module->add_global_variable(value); add_value(value); uint64_t init_id = entry.ops[2]; if (init_id != 0) global_initializations.push_back({ value, init_id - 1 }); return true; } bool ModuleParseContext::parse_function_record(const BlockOrRecord &entry) { if (use_strtab) { LOGE("Unknown module code 2 which uses strtab.\n"); return false; } if (entry.ops.size() < 3) return false; auto *type = get_type(entry.ops[0]); if (!type) return false; // Calling convention is [1], not relevant. bool is_proto = entry.ops[2]; // Lots of other irrelevant arguments ... auto *func_type = dyn_cast(type); if (!func_type) func_type = cast(cast(type)->getElementType()); if (!func_type) return false; auto id = values.size(); auto *func = context->construct(func_type, id, *module); if (entry.ops.size() >= 5 && entry.ops[4] != 0 && (entry.ops[4] - 1) < attribute_lists.size()) func->set_attributes(attribute_lists[entry.ops[4] - 1]); values.push_back(func); if (!is_proto) functions_with_bodies.push_back(func); return true; } bool ModuleParseContext::parse_version_record(const BlockOrRecord &entry) { if (entry.ops.size() < 1) return false; unsigned version = entry.ops[0]; use_relative_id = version >= 1; use_strtab = version >= 2; return true; } Type *ModuleParseContext::get_type(uint64_t index) { if (index >= types.size()) return nullptr; return types[index]; } bool ModuleParseContext::add_type(Type *type) { types.push_back(type); return true; } void Module::add_value_name(uint64_t id, const String &name) { value_symtab[id] = name; } void Module::add_function_implementation(Function *func) { functions.push_back(func); } void Module::add_global_variable(GlobalVariable *variable) { globals.push_back(variable); } void Module::add_named_metadata(const String &name, NamedMDNode *node) { named_metadata[name] = node; } void Module::add_unnamed_metadata(MDNode *node) { unnamed_metadata.push_back(node); } Function *Module::getFunction(const String &name) const { auto itr = std::find_if(functions.begin(), functions.end(), [&](const Function *func) { return func->getName() == name; }); if (itr != functions.end()) return *itr; else return nullptr; } NamedMDNode *Module::getNamedMetadata(const String &name) const { auto itr = named_metadata.find(name); if (itr != named_metadata.end()) return itr->second; else return nullptr; } static const String empty_string; const String &Module::get_value_name(uint64_t id) const { auto itr = value_symtab.find(id); if (itr != value_symtab.end()) return itr->second; else return empty_string; } LLVMContext &Module::getContext() { return context; } Module::Module(LLVMContext &context_) : context(context_) { } Vector::const_iterator Module::begin() const { return functions.begin(); } Vector::const_iterator Module::end() const { return functions.end(); } IteratorAdaptor::const_iterator> Module::global_begin() const { return globals.begin(); } IteratorAdaptor::const_iterator> Module::global_end() const { return globals.end(); } UnorderedMap::const_iterator Module::named_metadata_begin() const { return named_metadata.begin(); } UnorderedMap::const_iterator Module::named_metadata_end() const { return named_metadata.end(); } Vector::const_iterator Module::unnamed_metadata_begin() const { return unnamed_metadata.begin(); } Vector::const_iterator Module::unnamed_metadata_end() const { return unnamed_metadata.end(); } Module *parseIR(LLVMContext &context, const void *data, size_t size) { LLVMBC::BitcodeReader reader(static_cast(data), size); auto toplevel = reader.ReadToplevelBlock(); // The top-level block must be MODULE_BLOCK. if (KnownBlocks(toplevel.id) != KnownBlocks::MODULE_BLOCK) return nullptr; // We should have consumed all bits, only one top-level block. if (!reader.AtEndOfStream()) return nullptr; auto *module = context.construct(context); ModuleParseContext parse_context; parse_context.module = module; parse_context.context = &module->getContext(); for (auto &child : toplevel.children) { if (child.IsBlock()) { switch (KnownBlocks(child.id)) { case KnownBlocks::VALUE_SYMTAB_BLOCK: if (!parse_context.parse_value_symtab(child)) return nullptr; break; case KnownBlocks::FUNCTION_BLOCK: if (!parse_context.parse_function_body(child)) return nullptr; break; case KnownBlocks::TYPE_BLOCK: if (!parse_context.parse_types(child)) return nullptr; break; case KnownBlocks::CONSTANTS_BLOCK: if (!parse_context.parse_constants_block(child)) return nullptr; break; case KnownBlocks::METADATA_BLOCK: if (!parse_context.parse_metadata_block(child)) return nullptr; break; case KnownBlocks::PARAMATTR_BLOCK: if (!parse_context.parse_paramattr_block(child)) return nullptr; break; case KnownBlocks::PARAMATTR_GROUP_BLOCK: if (!parse_context.parse_paramattr_group_block(child)) return nullptr; break; default: break; } } else { switch (ModuleRecord(child.id)) { case ModuleRecord::VERSION: if (!parse_context.parse_version_record(child)) return nullptr; break; case ModuleRecord::FUNCTION: if (!parse_context.parse_function_record(child)) return nullptr; break; case ModuleRecord::GLOBAL_VARIABLE: if (!parse_context.parse_global_variable_record(child)) return nullptr; break; default: break; } } } return module; } } // namespace LLVMBC ================================================ FILE: bc/module.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once namespace dxbc_spv { namespace ir { class Builder; } } #include "data_structures.hpp" #include "iterator.hpp" #include #include #include #include // A reasonably small LLVM C++ API lookalike. #define llvm LLVMBC namespace LLVMBC { class Function; class LLVMContext; class Type; class Instruction; class Function; class BasicBlock; class GlobalVariable; class NamedMDNode; class MDNode; class Module { public: explicit Module(LLVMContext &context); LLVMContext &getContext(); NamedMDNode *getNamedMetadata(const String &name) const; Function *getFunction(const String &name) const; void add_value_name(uint64_t id, const String &name); void add_function_implementation(Function *func); void add_global_variable(GlobalVariable *variable); void add_named_metadata(const String &name, NamedMDNode *node); void add_unnamed_metadata(MDNode *node); const String &get_value_name(uint64_t id) const; Vector::const_iterator begin() const; Vector::const_iterator end() const; IteratorAdaptor::const_iterator> global_begin() const; IteratorAdaptor::const_iterator> global_end() const; UnorderedMap::const_iterator named_metadata_begin() const; UnorderedMap::const_iterator named_metadata_end() const; Vector::const_iterator unnamed_metadata_begin() const; Vector::const_iterator unnamed_metadata_end() const; private: LLVMContext &context; Vector functions; Vector globals; UnorderedMap value_symtab; UnorderedMap named_metadata; Vector unnamed_metadata; }; Module *parseIR(LLVMContext &context, const void *data, size_t size); Module *parseDXBCIR(LLVMContext &context, dxbc_spv::ir::Builder &builder); Module *parseDXBCBinary(LLVMContext &context, const void *data, size_t size); bool disassemble(Module &module, String &str); } // namespace LLVMBC ================================================ FILE: bc/module_dxbc_ir.cpp ================================================ /* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "module.hpp" #include "context.hpp" #include "metadata.hpp" #include "cast.hpp" #include "function.hpp" #include "instruction.hpp" #include #include "../dxil.hpp" #include // dxbc-spirv #include "ir/ir.h" #include "ir/ir_builder.h" #include "dxbc/dxbc_api.h" #include "util/util_log.h" using namespace dxbc_spv; class ScopedLogger : util::Logger { public: virtual void message(util::LogLevel severity, const char* text) { switch (severity) { case util::LogLevel::eDebug: case util::LogLevel::eInfo: LOGI("%s\n", text); break; case util::LogLevel::eWarn: LOGW("%s\n", text); break; case util::LogLevel::eError: LOGE("%s\n", text); break; } } virtual util::LogLevel getMinimumSeverity() { return util::LogLevel::eInfo; } }; namespace LLVMBC { template static void for_all_opcodes(ir::Builder &builder, ir::OpCode opcode, const Func &func) { for (auto &op : builder) if (op.getOpCode() == opcode) if (!func(op)) return; } static const char *shader_stage_to_meta(ir::ShaderStage stage) { switch (stage) { case ir::ShaderStage::eVertex: return "vs"; case ir::ShaderStage::eHull: return "hs"; case ir::ShaderStage::eDomain: return "ds"; case ir::ShaderStage::eGeometry: return "gs"; case ir::ShaderStage::ePixel: return "ps"; case ir::ShaderStage::eCompute: return "cs"; default: return ""; } } static DXIL::InputPrimitive convert_input_primitive_type(ir::PrimitiveType type) { switch (type) { case ir::PrimitiveType::eLines: return DXIL::InputPrimitive::Line; case ir::PrimitiveType::eLinesAdj: return DXIL::InputPrimitive::LineWithAdjacency; case ir::PrimitiveType::ePoints: return DXIL::InputPrimitive::Point; case ir::PrimitiveType::eTriangles: return DXIL::InputPrimitive::Triangle; case ir::PrimitiveType::eTrianglesAdj: return DXIL::InputPrimitive::TriangleWithAdjaceny; default: return DXIL::InputPrimitive::Undefined; } } static DXIL::PrimitiveTopology convert_output_primitive_type(ir::PrimitiveType type) { switch (type) { case ir::PrimitiveType::eLines: return DXIL::PrimitiveTopology::LineStrip; case ir::PrimitiveType::ePoints: return DXIL::PrimitiveTopology::PointList; case ir::PrimitiveType::eTriangles: return DXIL::PrimitiveTopology::TriangleStrip; default: return DXIL::PrimitiveTopology::Undefined; } } static DXIL::ResourceKind convert_resource_kind(ir::ResourceKind kind) { switch (kind) { case ir::ResourceKind::eBufferRaw: return DXIL::ResourceKind::RawBuffer; case ir::ResourceKind::eBufferStructured: return DXIL::ResourceKind::StructuredBuffer; case ir::ResourceKind::eBufferTyped: return DXIL::ResourceKind::TypedBuffer; case ir::ResourceKind::eImage1D: return DXIL::ResourceKind::Texture1D; case ir::ResourceKind::eImage1DArray: return DXIL::ResourceKind::Texture1DArray; case ir::ResourceKind::eImage2D: return DXIL::ResourceKind::Texture2D; case ir::ResourceKind::eImage2DArray: return DXIL::ResourceKind::Texture2DArray; case ir::ResourceKind::eImage3D: return DXIL::ResourceKind::Texture3D; case ir::ResourceKind::eImageCube: return DXIL::ResourceKind::TextureCube; case ir::ResourceKind::eImageCubeArray: return DXIL::ResourceKind::TextureCubeArray; case ir::ResourceKind::eImage2DMS: return DXIL::ResourceKind::Texture2DMS; case ir::ResourceKind::eImage2DMSArray: return DXIL::ResourceKind::Texture2DMSArray; default: LOGE("Unrecognized resource kind %d\n", int(kind)); return DXIL::ResourceKind::Invalid; } } static DXIL::Op convert_builtin_opcode(ir::BuiltIn builtin) { switch (builtin) { case ir::BuiltIn::eSampleCount: return DXIL::Op::RenderTargetGetSampleCount; case ir::BuiltIn::eLocalThreadIndex: return DXIL::Op::FlattenedThreadIdInGroup; case ir::BuiltIn::eIsFullyCovered: return DXIL::Op::InnerCoverage; case ir::BuiltIn::eGsInstanceId: return DXIL::Op::GSInstanceID; case ir::BuiltIn::ePrimitiveId: return DXIL::Op::PrimitiveID; case ir::BuiltIn::eTessControlPointId: return DXIL::Op::OutputControlPointID; case ir::BuiltIn::eTessControlPointCountIn: return DXIL::Op::ExtendedSpirvControlPointCountIn; default: return DXIL::Op::Count; } } static DXIL::Semantic convert_semantic(ir::BuiltIn builtin) { switch (builtin) { case ir::BuiltIn::ePosition: return DXIL::Semantic::Position; case ir::BuiltIn::eClipDistance: return DXIL::Semantic::ClipDistance; case ir::BuiltIn::eCullDistance: return DXIL::Semantic::CullDistance; case ir::BuiltIn::eVertexId: return DXIL::Semantic::VertexID; case ir::BuiltIn::eInstanceId: return DXIL::Semantic::InstanceID; case ir::BuiltIn::ePrimitiveId: return DXIL::Semantic::PrimitiveID; case ir::BuiltIn::eLayerIndex: return DXIL::Semantic::RenderTargetArrayIndex; case ir::BuiltIn::eViewportIndex: return DXIL::Semantic::ViewPortArrayIndex; case ir::BuiltIn::eGsInstanceId: return DXIL::Semantic::GSInstanceID; case ir::BuiltIn::eTessControlPointId: return DXIL::Semantic::OutputControlPointID; case ir::BuiltIn::eTessCoord: return DXIL::Semantic::DomainLocation; case ir::BuiltIn::eTessFactorInner: return DXIL::Semantic::InsideTessFactor; case ir::BuiltIn::eTessFactorOuter: return DXIL::Semantic::TessFactor; case ir::BuiltIn::eSampleId: return DXIL::Semantic::SampleIndex; case ir::BuiltIn::eSampleMask: return DXIL::Semantic::Coverage; case ir::BuiltIn::eIsFrontFace: return DXIL::Semantic::IsFrontFace; case ir::BuiltIn::eDepth: return DXIL::Semantic::Depth; case ir::BuiltIn::eStencilRef: return DXIL::Semantic::StencilRef; case ir::BuiltIn::eGlobalThreadId: return DXIL::Semantic::DispatchThreadID; case ir::BuiltIn::eLocalThreadId: return DXIL::Semantic::GroupThreadID; case ir::BuiltIn::eWorkgroupId: return DXIL::Semantic::GroupID; default: return DXIL::Semantic::User; } } static DXIL::InterpolationMode convert_interpolation_mode(ir::InterpolationMode mode) { switch (mode) { case ir::InterpolationMode::eCentroid: return DXIL::InterpolationMode::LinearCentroid; case ir::InterpolationMode::eNoPerspective: return DXIL::InterpolationMode::LinearNoperspective; case ir::InterpolationMode::eFlat: return DXIL::InterpolationMode::Constant; case ir::InterpolationMode::eSample: return DXIL::InterpolationMode::LinearSample; default: return DXIL::InterpolationMode::Undefined; } } static DXIL::AtomicBinOp convert_atomic_binop(ir::AtomicOp binop) { switch (binop) { case ir::AtomicOp::eAdd: case ir::AtomicOp::eInc: return DXIL::AtomicBinOp::IAdd; case ir::AtomicOp::eAnd: return DXIL::AtomicBinOp::And; case ir::AtomicOp::eOr: return DXIL::AtomicBinOp::Or; case ir::AtomicOp::eXor: return DXIL::AtomicBinOp::Xor; case ir::AtomicOp::eExchange: return DXIL::AtomicBinOp::Exchange; case ir::AtomicOp::eSMax: return DXIL::AtomicBinOp::IMax; case ir::AtomicOp::eSMin: return DXIL::AtomicBinOp::IMin; case ir::AtomicOp::eUMax: return DXIL::AtomicBinOp::UMax; case ir::AtomicOp::eUMin: return DXIL::AtomicBinOp::UMin; case ir::AtomicOp::eSub: case ir::AtomicOp::eDec: return DXIL::AtomicBinOp::Sub; case ir::AtomicOp::eLoad: return DXIL::AtomicBinOp::Load; case ir::AtomicOp::eStore: return DXIL::AtomicBinOp::Store; default: return DXIL::AtomicBinOp::Invalid; } } static AtomicRMWInst::BinOp convert_atomic_binop_llvm(ir::AtomicOp binop) { switch (binop) { case ir::AtomicOp::eAdd: case ir::AtomicOp::eInc: return AtomicRMWInst::BinOp::Add; case ir::AtomicOp::eAnd: return AtomicRMWInst::BinOp::And; case ir::AtomicOp::eOr: return AtomicRMWInst::BinOp::Or; case ir::AtomicOp::eXor: return AtomicRMWInst::BinOp::Xor; case ir::AtomicOp::eExchange: return AtomicRMWInst::BinOp::Xchg; case ir::AtomicOp::eSMax: return AtomicRMWInst::BinOp::Max; case ir::AtomicOp::eSMin: return AtomicRMWInst::BinOp::Min; case ir::AtomicOp::eUMax: return AtomicRMWInst::BinOp::UMax; case ir::AtomicOp::eUMin: return AtomicRMWInst::BinOp::UMin; case ir::AtomicOp::eSub: case ir::AtomicOp::eDec: return AtomicRMWInst::BinOp::Sub; case ir::AtomicOp::eLoad: return AtomicRMWInst::BinOp::Or; case ir::AtomicOp::eStore: return AtomicRMWInst::BinOp::Xchg; default: return AtomicRMWInst::BinOp::Invalid; } } static DXIL::Op convert_round_mode(ir::RoundMode mode) { switch (mode) { case ir::RoundMode::ePositiveInf: return DXIL::Op::Round_pi; case ir::RoundMode::eNegativeInf: return DXIL::Op::Round_ni; case ir::RoundMode::eZero: return DXIL::Op::Round_z; default: return DXIL::Op::Round_ne; } } struct ComponentMapping { DXIL::ComponentType type = DXIL::ComponentType::Invalid; uint32_t num_rows = 1; uint32_t num_cols = 1; }; static ComponentMapping convert_component_mapping(const ir::Type &type, bool need_axis) { ComponentMapping mapping = {}; switch (type.getBaseType(0).getBaseType()) { case ir::ScalarType::eF16: mapping.type = DXIL::ComponentType::F16; break; case ir::ScalarType::eI16: mapping.type = DXIL::ComponentType::I16; break; case ir::ScalarType::eU16: mapping.type = DXIL::ComponentType::U16; break; case ir::ScalarType::eF32: mapping.type = DXIL::ComponentType::F32; break; case ir::ScalarType::eI32: mapping.type = DXIL::ComponentType::I32; break; case ir::ScalarType::eU32: mapping.type = DXIL::ComponentType::U32; break; case ir::ScalarType::eF64: mapping.type = DXIL::ComponentType::F64; break; case ir::ScalarType::eI64: mapping.type = DXIL::ComponentType::I64; break; case ir::ScalarType::eU64: mapping.type = DXIL::ComponentType::U64; break; case ir::ScalarType::eBool: mapping.type = DXIL::ComponentType::I1; break; default: LOGE("Unrecognized component type.\n"); break; } if (need_axis) { // Strip the outermost dimension. if (type.getArrayDimensions() >= 2) mapping.num_rows = type.getArraySize(type.getArrayDimensions() - 2); } else if (type.isArrayType()) { mapping.num_rows = type.getArraySize(0); if (type.getArrayDimensions() != 1) LOGE("Unexpected number of array dimensions.\n"); } mapping.num_cols = type.getBaseType(0).getVectorSize(); return mapping; } static DXIL::TessellatorDomain convert_hull_domain(ir::PrimitiveType type) { switch (type) { case ir::PrimitiveType::eTriangles: return DXIL::TessellatorDomain::Tri; case ir::PrimitiveType::eQuads: return DXIL::TessellatorDomain::Quad; case ir::PrimitiveType::eLines: return DXIL::TessellatorDomain::IsoLine; default: return DXIL::TessellatorDomain::Undefined; } } static DXIL::TessellatorPartitioning convert_hull_partitioning(ir::TessPartitioning part) { switch (part) { case ir::TessPartitioning::eInteger: return DXIL::TessellatorPartitioning::Integer; case ir::TessPartitioning::eFractEven: return DXIL::TessellatorPartitioning::FractionalEven; case ir::TessPartitioning::eFractOdd: return DXIL::TessellatorPartitioning::FractionalOdd; default: return DXIL::TessellatorPartitioning::Undefined; } } static DXIL::TessellatorOutputPrimitive convert_hull_output_primitive(ir::PrimitiveType type, ir::TessWindingOrder winding) { switch (type) { case ir::PrimitiveType::eTriangles: return winding == ir::TessWindingOrder::eCw ? DXIL::TessellatorOutputPrimitive::TriangleCW : DXIL::TessellatorOutputPrimitive::TriangleCCW; case ir::PrimitiveType::eLines: return DXIL::TessellatorOutputPrimitive::Line; case ir::PrimitiveType::ePoints: return DXIL::TessellatorOutputPrimitive::Point; default: return DXIL::TessellatorOutputPrimitive::Undefined; } } struct DXILIntrinsicTable { struct FunctionOverload { Function *func; // Either overloaded on return type, or the primary argument for e.g. stores. Type *overload_type; }; struct FunctionEntry { // At most should be overload for i32/u32/f32 x 16/32/64 std::array overloads; unsigned num_overloads; }; FunctionEntry intrinsic_functions[int(DXIL::Op::Count)] = {}; Function *get(Module &module, DXIL::Op op, Type *return_type, const Vector &argument_types, Type *overload_type, uint64_t &tween); }; Function *DXILIntrinsicTable::get( Module &module, DXIL::Op op, Type *return_type, const Vector &argument_types, Type *overload_type, uint64_t &tween) { auto &entry = intrinsic_functions[int(op)]; for (unsigned i = 0; i < entry.num_overloads; i++) if (entry.overloads[i].overload_type == overload_type) return entry.overloads[i].func; auto &context = module.getContext(); assert(entry.num_overloads < entry.overloads.size()); auto *func_type = context.construct(context, return_type, argument_types); auto *func = context.construct(func_type, ++tween, module); // TODO: Can have a look-up for expected intrinsics name. module.add_value_name(tween, "dx.op.intrinsic"); entry.overloads[entry.num_overloads++] = { func, overload_type }; return func; } class ParseContext { public: ParseContext(LLVMContext &context_, ir::Builder &builder_, Module &module_) : context(context_), builder(builder_), module(module_) {} bool emit_metadata(); bool emit_entry_point(); bool emit_function_bodies(); private: LLVMContext &context; ir::Builder &builder; Module &module; uint64_t metadata_tween_id = 0; uint64_t tween_id = 0; ir::ShaderStage shader_stage = {}; ConstantInt *get_constant_uint(uint32_t value); // Metadata wrangling ConstantAsMetadata *create_constant_uint_meta(uint32_t value); ConstantAsMetadata *create_constant_uint64_meta(uint32_t value); MDString *create_string_meta(const String &str); ConstantAsMetadata *create_constant_meta(Constant *c); template MDNode *create_md_node(Ops&&... ops) { Vector vops { std::forward(ops)... }; return create_md_node(std::move(vops)); } void create_named_md_node(const String &name, MDNode *node); MDNode *create_md_node(Vector ops); MDOperand *create_entry_point_meta(llvm::Function *patch_control_func); MDNode *create_stage_io_meta(); MDOperand *create_null_meta(); void set_function_attributes(Function *func); struct MetadataMapping { Vector nodes; }; MetadataMapping srvs, uavs, cbvs, samplers, inputs, outputs, patches; uint32_t build_texture_srv(uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, DXIL::ComponentType type); uint32_t build_texture_uav(uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, DXIL::ComponentType type, bool coherent, bool rov); uint32_t build_buffer_uav(uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, uint32_t stride, bool coherent, bool counter, bool rov); uint32_t build_buffer_srv(uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, uint32_t stride); uint32_t build_cbv(uint32_t space, uint32_t index, uint32_t size, uint32_t cbv_size); uint32_t build_sampler(uint32_t space, uint32_t index, uint32_t size); uint32_t build_stage_io(MetadataMapping &mapping, ir::SsaDef ssa, const String &name, DXIL::ComponentType type, DXIL::Semantic semantic, uint32_t semantic_index, DXIL::InterpolationMode interpolation, uint32_t rows, uint32_t cols, uint32_t start_row, uint32_t start_col, uint32_t stream, bool need_axis); // DXIL intrinsic build. DXILIntrinsicTable dxil_intrinsics; template Instruction *build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Values&&... values); Instruction *build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Vector values); // Resource access hell. Instruction *build_load_input( uint32_t index, Type *type, Value *row, uint32_t col, Value *axis, bool patch); Instruction *build_load_output( uint32_t index, Type *type, Value *row, uint32_t col, Value *axis, bool patch); Instruction *build_store_output(uint32_t index, Value *row, uint32_t col, Value *value, bool patch); Instruction *build_load_builtin(DXIL::Op opcode, ir::SsaDef addr); Instruction *build_descriptor_load(ir::SsaDef resource, ir::SsaDef index, bool nonuniform); bool build_input_load(const ir::Op &op); bool build_output_load(const ir::Op &op); bool build_output_store(const ir::Op &op); bool build_gep_load(const ir::Op &op); bool build_gep_store(const ir::Op &op); bool build_composite_construct(const ir::Op &op); bool build_composite_extract(const ir::Op &op); bool build_composite_insert(const ir::Op &op); bool build_descriptor_load(const ir::Op &op); bool build_buffer_load(const ir::Op &op); bool build_buffer_load_cbv(const ir::Op &op); bool build_buffer_load(const ir::Op &op, DXIL::ResourceKind kind); bool build_buffer_load_return_composite(const ir::Op &op, Value *value); Instruction *build_extract_composite(const ir::Op &op, Value *value, unsigned num_elements); bool build_buffer_query_size(const ir::Op &op); bool build_buffer_store(const ir::Op &op); bool build_buffer_store(const ir::Op &op, DXIL::ResourceKind kind); bool build_buffer_atomic(const ir::Op &op); bool build_lds_atomic(const ir::Op &op); bool build_buffer_atomic_binop(const ir::Op &op, DXIL::ResourceKind kind); bool build_counter_atomic(const ir::Op &op); bool build_image_load(const ir::Op &op); bool build_image_store(const ir::Op &op); bool build_image_atomic(const ir::Op &op); bool build_image_query_size(const ir::Op &op); bool build_image_query_mips_samples(const ir::Op &op); bool build_image_sample(const ir::Op &op); bool build_image_gather(const ir::Op &op); bool build_image_compute_lod(const ir::Op &op); bool build_deriv(const ir::Op &op); bool build_check_sparse_access(const ir::Op &op); bool build_fround(const ir::Op &op); bool build_frcp(const ir::Op &op); bool build_binary_op(const ir::Op &op, BinaryOperator::BinaryOps binop); bool build_interpolate_at_centroid(const ir::Op &op); bool build_interpolate_at_sample(const ir::Op &op); bool build_interpolate_at_offset(const ir::Op &op); bool build_barrier(const ir::Op &op); bool build_demote(const ir::Op &op); template bool build_dxil_unary(const ir::Op &op); template bool build_dxil_constant_unary(const ir::Op &op); template bool build_dxil_binary(const ir::Op &op); template bool build_dxil_trinary(const ir::Op &op); template bool build_dxil_quaternary(const ir::Op &op); Value *get_extracted_composite_component(Value *value, unsigned component); Value *get_constant_mul(Value *value, uint32_t scale); // BasicBlock emission. BasicBlock *current_bb = nullptr; void push_instruction(Instruction *instruction, ir::SsaDef ssa = {}); bool push_instruction(const ir::Op &op); // ir::Builder helpers. // Need ways to translate between ir::SsaDef <-> LLVM values for the most part. UnorderedMap function_map; UnorderedMap bb_map; UnorderedMap param_types; Vector> params; UnorderedMap value_map; // Maps stage IO and resources since we need to resolve them back to type + metadata index // when loading descriptor. struct StageIOHandler { uint32_t index = UINT32_MAX; DXIL::Op op = DXIL::Op::Count; bool need_axis = false; }; UnorderedMap stage_io_map; struct StageIOAccess { Value *axis; Value *row; uint32_t col; }; StageIOAccess build_stage_io_access(const StageIOHandler &handler, ir::SsaDef io_decl, ir::SsaDef addr); struct ResourceHandler { DXIL::ResourceType resource_type; DXIL::ResourceKind resource_kind; uint32_t index; uint32_t binding_offset; // DXIL is weird. }; UnorderedMap resource_map; Type *convert_type(const ir::Type &type); BasicBlock *get_basic_block(ir::SsaDef ssa); Value *get_value(const ir::Operand &op) const; Value *get_value(const ir::SsaDef &op) const; bool emit_constant(const ir::Op &op); ir::OpFlags global_fp_flags = {}; }; static inline Type *get_value_type(Value *value) { assert(value); return value->getType(); } static Type *get_scalar_type(Type *type) { if (auto *vec = dyn_cast(type)) return vec->getElementType(); else if (isa(type)) return type->getStructElementType(0); else return type; } template Instruction *ParseContext::build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Values&&... values) { auto *func = dxil_intrinsics.get( module, op, return_type, Vector { Type::getInt32Ty(context), get_value_type(values)... }, overload_type, tween_id); auto *inst = context.construct( func->getFunctionType(), func, Vector { get_constant_uint(uint32_t(op)), values... }); return inst; } Instruction *ParseContext::build_dxil_call(DXIL::Op op, Type *return_type, Type *overload_type, Vector values) { Vector types; types.reserve(values.size() + 1); types.push_back(Type::getInt32Ty(context)); for (auto *v : values) types.push_back(v->getType()); auto *func = dxil_intrinsics.get(module, op, return_type, types, overload_type, tween_id); values.insert(values.begin(), get_constant_uint(uint32_t(op))); auto *inst = context.construct(func->getFunctionType(), func, std::move(values)); return inst; } bool ParseContext::emit_constant(const ir::Op &op) { auto &type = op.getType(); Value *value = nullptr; if (type.isBasicType()) { auto *llvm_type = convert_type(type); if (type.isScalarType()) { if (type.getBaseType(0).isIntType()) value = ConstantInt::get(llvm_type, uint64_t(op.getOperand(0))); else if (type.getBaseType(0).isFloatType()) value = ConstantFP::get(llvm_type, uint64_t(op.getOperand(0))); else if (type.getBaseType(0).isBoolType()) value = ConstantInt::get(llvm_type, bool(op.getOperand(0))); else return false; } else { Vector constants; constants.reserve(op.getOperandCount()); auto *llvm_sub_type = get_scalar_type(llvm_type); if (type.getBaseType(0).isIntType()) { for (uint32_t i = 0; i < op.getOperandCount(); i++) constants.push_back(ConstantInt::get(llvm_sub_type, uint64_t(op.getOperand(i)))); } else if (type.getBaseType(0).isFloatType()) { for (uint32_t i = 0; i < op.getOperandCount(); i++) constants.push_back(ConstantFP::get(llvm_sub_type, uint64_t(op.getOperand(i)))); } else return false; value = context.construct(convert_type(type), std::move(constants)); } } else if (type.isArrayType()) { // This is quite flexible, but only support what we can reasonably expect to see. Extend and generalize if needed. auto elem_type = type.getSubType(0); if (!elem_type.isScalarType() && !elem_type.isVectorType()) return false; uint32_t vecsize = elem_type.getBaseType(0).getVectorSize(); assert(vecsize && op.getOperandCount() % vecsize == 0); uint32_t array_elements = op.getOperandCount() / vecsize; Vector constants; Vector values; values.reserve(array_elements); constants.reserve(vecsize); auto *llvm_sub_type = convert_type(elem_type); for (uint32_t elem = 0; elem < array_elements; elem++) { constants.clear(); for (uint32_t c = 0; c < vecsize; c++) { if (elem_type.getBaseType(0).isIntType()) { constants.push_back(ConstantInt::get(get_scalar_type(llvm_sub_type), uint64_t(op.getOperand(elem * vecsize + c)))); } else if (elem_type.getBaseType(0).isFloatType()) { constants.push_back(ConstantFP::get(get_scalar_type(llvm_sub_type), uint64_t(op.getOperand(elem * vecsize + c)))); } else return false; } if (elem_type.isVectorType()) values.push_back(context.construct(convert_type(elem_type), constants)); else values.push_back(constants[0]); } auto *constant_value = context.construct(convert_type(op.getType()), values); auto *lut = context.construct( PointerType::get(convert_type(op.getType()), uint32_t(DXIL::AddressSpace::Thread)), GlobalVariable::LinkageTypes::InternalLinkage, false); lut->set_initializer(constant_value); module.add_global_variable(lut); value = lut; } if (!value) return false; value_map[op.getDef()] = value; return true; } Type *ParseContext::convert_type(const ir::Type &type) { if (type.isArrayType()) { auto *llvm_type = convert_type(type.getSubType(0)); for (unsigned dim = 0; dim < type.getArrayDimensions(); dim++) llvm_type = ArrayType::get(llvm_type, type.getArraySize(dim)); return llvm_type; } else if (type.isStructType()) { Vector members; for (unsigned index = 0; index < type.getStructMemberCount(); index++) members.push_back(convert_type(type.getSubType(index))); return StructType::get(context, std::move(members)); } else if (type.isVoidType()) { return Type::getVoidTy(context); } else if (type.isBasicType()) { Type *llvm_type; ir::BasicType base = type.getBaseType(0); switch (base.getBaseType()) { case ir::ScalarType::eF16: llvm_type = Type::getHalfTy(context); break; case ir::ScalarType::eF32: llvm_type = Type::getFloatTy(context); break; case ir::ScalarType::eF64: llvm_type = Type::getDoubleTy(context); break; case ir::ScalarType::eI16: case ir::ScalarType::eU16: llvm_type = Type::getInt16Ty(context); break; case ir::ScalarType::eI32: case ir::ScalarType::eU32: llvm_type = Type::getInt32Ty(context); break; case ir::ScalarType::eI64: case ir::ScalarType::eU64: llvm_type = Type::getInt64Ty(context); break; case ir::ScalarType::eBool: llvm_type = Type::getInt1Ty(context); break; default: LOGE("Unrecognized basic scalar type %u\n", unsigned(base.getBaseType())); return nullptr; } if (base.isVector()) llvm_type = VectorType::get(base.getVectorSize(), llvm_type); return llvm_type; } else { LOGE("Unrecognized type.\n"); return nullptr; } } void ParseContext::push_instruction(Instruction *instruction, ir::SsaDef ssa) { assert(current_bb); instruction->set_tween_id(++tween_id); current_bb->add_instruction(instruction); if (ssa) value_map[ssa] = instruction; } ParseContext::StageIOAccess ParseContext::build_stage_io_access(const StageIOHandler &handler, ir::SsaDef io_decl, ir::SsaDef addr) { Value *axis = nullptr; Value *row = get_constant_uint(0); uint32_t col = 0; if (addr) { auto &decl = builder.getOp(io_decl); auto &addr_op = builder.getOp(addr); uint32_t chain_length = addr_op.getType().getBaseType(0).getVectorSize(); uint32_t dim = 0; auto *addr_value = get_value(addr); if (handler.need_axis) axis = get_extracted_composite_component(addr_value, dim++); if (dim + 1 == decl.getType().getArrayDimensions()) row = get_extracted_composite_component(addr_value, dim++); // This is optional if we're loading from a scalar, or we're loading the full vector. if (dim < chain_length) { assert(dim == chain_length - 1); // The last element is the column. It must be constant. if (const auto *c = dyn_cast(get_extracted_composite_component(addr_value, dim))) { col = c->getUniqueInteger().getZExtValue(); } else { LOGE("Column index is not compile-time constant.\n"); return {}; } } } return { axis, row, col }; } static bool io_decl_is_patch(ir::ShaderStage stage, const ir::Op &op) { if (stage != ir::ShaderStage::eHull && stage != ir::ShaderStage::eDomain) return false; switch (op.getOpCode()) { case ir::OpCode::eDclInput: return stage == ir::ShaderStage::eDomain && !op.getType().isArrayType(); case ir::OpCode::eDclOutput: return stage == ir::ShaderStage::eHull && !op.getType().isArrayType(); case ir::OpCode::eDclOutputBuiltIn: if (stage == ir::ShaderStage::eDomain) return false; break; default: break; } // For builtin-IO, there's tess factors and clip/cull distance that is a bit "special". auto builtin = ir::BuiltIn(op.getOperand(1)); if (builtin == ir::BuiltIn::eTessCoord) return false; return builtin == ir::BuiltIn::eTessFactorOuter || builtin == ir::BuiltIn::eTessFactorInner || !op.getType().isArrayType(); } bool ParseContext::build_input_load(const ir::Op &op) { auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))]; // Redirect to magic opcode as needed. if (ref.op != DXIL::Op::Count) { auto *inst = build_load_builtin(ref.op, ir::SsaDef(op.getOperand(1))); push_instruction(inst, op.getDef()); return true; } auto *type = convert_type(op.getType()); auto *scalar_type = type; unsigned components = 1; if (const auto *vec = llvm::dyn_cast(type)) { components = vec->getVectorSize(); scalar_type = vec->getElementType(); } Instruction *insts[4] = {}; auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1))); bool patch = io_decl_is_patch(shader_stage, builder.getOp(ir::SsaDef(op.getOperand(0)))); for (unsigned c = 0; c < components; c++) { insts[c] = build_load_input(ref.index, scalar_type, access.row, access.col + c, access.axis, patch); push_instruction(insts[c], op.getDef()); } if (components != 1) { auto *inst = context.construct( type, Vector{ insts, insts + components }); push_instruction(inst, op.getDef()); } return true; } bool ParseContext::build_output_load(const ir::Op &op) { auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))]; auto *type = convert_type(op.getType()); auto *scalar_type = type; unsigned components = 1; if (const auto *vec = llvm::dyn_cast(type)) { components = vec->getVectorSize(); scalar_type = vec->getElementType(); } Instruction *insts[4] = {}; auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1))); bool patch = io_decl_is_patch(shader_stage, builder.getOp(ir::SsaDef(op.getOperand(0)))); for (unsigned c = 0; c < components; c++) { insts[c] = build_load_output(ref.index, scalar_type, access.row, access.col + c, access.axis, patch); push_instruction(insts[c], op.getDef()); } if (components != 1) { auto *inst = context.construct( type, Vector{ insts, insts + components }); push_instruction(inst, op.getDef()); } return true; } bool ParseContext::build_interpolate_at_centroid(const ir::Op &op) { auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))]; auto *type = convert_type(op.getType()); auto *scalar_type = type; unsigned components = 1; if (const auto *vec = llvm::dyn_cast(type)) { components = vec->getVectorSize(); scalar_type = vec->getElementType(); } Instruction *insts[4] = {}; auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1))); for (unsigned c = 0; c < components; c++) { insts[c] = build_dxil_call(DXIL::Op::EvalCentroid, scalar_type, scalar_type, get_constant_uint(ref.index), access.row, get_constant_uint(access.col + c)); push_instruction(insts[c], op.getDef()); } if (components != 1) { auto *inst = context.construct( type, Vector{ insts, insts + components }); push_instruction(inst, op.getDef()); } return true; } bool ParseContext::build_interpolate_at_sample(const ir::Op &op) { auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))]; auto *type = convert_type(op.getType()); auto *scalar_type = type; unsigned components = 1; if (const auto *vec = llvm::dyn_cast(type)) { components = vec->getVectorSize(); scalar_type = vec->getElementType(); } Instruction *insts[4] = {}; auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1))); for (unsigned c = 0; c < components; c++) { insts[c] = build_dxil_call(DXIL::Op::EvalSampleIndex, scalar_type, scalar_type, get_constant_uint(ref.index), access.row, get_constant_uint(access.col + c), get_value(op.getOperand(2))); push_instruction(insts[c], op.getDef()); } if (components != 1) { auto *inst = context.construct( type, Vector{ insts, insts + components }); push_instruction(inst, op.getDef()); } return true; } bool ParseContext::build_interpolate_at_offset(const ir::Op &op) { auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))]; auto *type = convert_type(op.getType()); auto *scalar_type = type; unsigned components = 1; if (const auto *vec = llvm::dyn_cast(type)) { components = vec->getVectorSize(); scalar_type = vec->getElementType(); } Instruction *insts[4] = {}; auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1))); for (unsigned c = 0; c < components; c++) { insts[c] = build_dxil_call(DXIL::Op::ExtendedEvalSnapped, scalar_type, scalar_type, get_constant_uint(ref.index), access.row, get_constant_uint(access.col + c), get_value(op.getOperand(2))); push_instruction(insts[c], op.getDef()); } if (components != 1) { auto *inst = context.construct( type, Vector{ insts, insts + components }); push_instruction(inst, op.getDef()); } return true; } bool ParseContext::build_output_store(const ir::Op &op) { auto *store_value = get_value(op.getOperand(2)); auto &ref = stage_io_map[ir::SsaDef(op.getOperand(0))]; unsigned components = 1; if (const auto *vec = llvm::dyn_cast(store_value->getType())) components = vec->getVectorSize(); auto access = build_stage_io_access(ref, ir::SsaDef(op.getOperand(0)), ir::SsaDef(op.getOperand(1))); bool patch = io_decl_is_patch(shader_stage, builder.getOp(ir::SsaDef(op.getOperand(0)))); if (components == 1) { push_instruction(build_store_output(ref.index, access.row, access.col, store_value, patch)); } else { for (unsigned c = 0; c < components; c++) { auto *value = get_extracted_composite_component(store_value, c); push_instruction(build_store_output(ref.index, access.row, access.col + c, value, patch)); } } return true; } bool ParseContext::build_gep_load(const ir::Op &op) { auto *type = convert_type(op.getType()); Vector args; args.push_back(get_value(op.getOperand(0))); args.push_back(get_constant_uint(0)); if (op.getOperand(1)) { auto &addr = builder.getOp(ir::SsaDef(op.getOperand(1))); auto *addr_value = get_value(op.getOperand(1)); for (uint32_t i = 0; i < addr.getType().getBaseType(0).getVectorSize(); i++) args.push_back(get_extracted_composite_component(addr_value, i)); } auto addr_space = op.getOpCode() == ir::OpCode::eLdsLoad ? DXIL::AddressSpace::GroupShared : DXIL::AddressSpace::Thread; auto *gep = context.construct( PointerType::get(type, uint32_t(addr_space)), std::move(args), true); auto *load = context.construct(type, gep); push_instruction(gep); push_instruction(load, op.getDef()); return true; } bool ParseContext::build_gep_store(const ir::Op &op) { auto *type = convert_type(builder.getOp(ir::SsaDef(op.getOperand(2))).getType()); Vector args; args.push_back(get_value(op.getOperand(0))); args.push_back(get_constant_uint(0)); if (op.getOperand(1)) { auto &addr = builder.getOp(ir::SsaDef(op.getOperand(1))); auto *addr_value = get_value(op.getOperand(1)); for (uint32_t i = 0; i < addr.getType().getBaseType(0).getVectorSize(); i++) args.push_back(get_extracted_composite_component(addr_value, i)); } auto addr_space = op.getOpCode() == ir::OpCode::eLdsStore ? DXIL::AddressSpace::GroupShared : DXIL::AddressSpace::Thread; auto *gep = context.construct( PointerType::get(type, uint32_t(addr_space)), std::move(args), true); auto *store = context.construct(gep, get_value(op.getOperand(2))); push_instruction(gep); push_instruction(store, op.getDef()); return true; } bool ParseContext::build_composite_construct(const ir::Op &op) { auto *type = convert_type(op.getType()); Vector values; values.reserve(op.getOperandCount()); for (unsigned i = 0; i < op.getOperandCount(); i++) values.push_back(get_value(op.getOperand(i))); auto *inst = context.construct(type, std::move(values)); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_composite_extract(const ir::Op &op) { auto &address = builder.getOpForOperand(op, 1); if (!address.isConstant()) { LOGE("CompositeExtract must take a constant index.\n"); return false; } auto *value = get_value(op.getOperand(0)); for (unsigned i = 0; i < address.getOperandCount(); i++) value = get_extracted_composite_component(value, uint32_t(address.getOperand(i))); value_map[op.getDef()] = value; return true; } bool ParseContext::build_composite_insert(const ir::Op &op) { auto *inst = context.construct( get_value(op.getOperand(0)), get_value(op.getOperand(2)), get_value(op.getOperand(1))); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_descriptor_load(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &dcl_op = builder.getOp(descriptor); if (dcl_op.getOpCode() == ir::OpCode::eDclUavCounter) descriptor = ir::SsaDef(dcl_op.getOperand(1)); auto *inst = build_descriptor_load(descriptor, ir::SsaDef(op.getOperand(1)), bool(op.getFlags() & ir::OpFlag::eNonUniform)); if (!inst) return false; push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_deriv(const ir::Op &op) { auto *inst = build_dxil_call(DXIL::Op::ExtendedDeriv, convert_type(op.getType()), convert_type(op.getType()), get_value(op.getOperand(0)), get_constant_uint(op.getOpCode() == ir::OpCode::eDerivY), get_constant_uint(uint32_t(op.getOperand(1)))); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_check_sparse_access(const ir::Op &op) { auto *inst = build_dxil_call(DXIL::Op::CheckAccessFullyMapped, convert_type(op.getType()), convert_type(op.getType()), get_value(op.getOperand(0))); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_fround(const ir::Op &op) { auto dxop = convert_round_mode(ir::RoundMode(op.getOperand(op.getFirstLiteralOperandIndex()))); auto *inst = build_dxil_call(dxop, convert_type(op.getType()), convert_type(op.getType()), get_value(op.getOperand(0))); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_frcp(const ir::Op &op) { Constant *const1; switch (op.getType().getBaseType(0).getBaseType()) { case ir::ScalarType::eF16: const1 = ConstantFP::get(Type::getHalfTy(context), 0x3c00); break; case ir::ScalarType::eF32: { const float one = 1.0f; uint32_t v; memcpy(&v, &one, sizeof(one)); const1 = ConstantFP::get(Type::getFloatTy(context), v); break; } case ir::ScalarType::eF64: { const double one = 1.0; uint64_t v; memcpy(&v, &one, sizeof(one)); const1 = ConstantFP::get(Type::getDoubleTy(context), v); break; } default: return false; } if (op.getType().isVectorType()) { unsigned num_components = op.getType().getBaseType(0).getVectorSize(); Vector values; values.reserve(num_components); for (unsigned i = 0; i < num_components; i++) values.push_back(const1); const1 = context.construct(VectorType::get(num_components, const1->getType()), std::move(values)); } auto *inst = context.construct(const1, get_value(op.getOperand(0)), BinaryOperator::BinaryOps::FDiv); inst->setFast(!((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise)); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_binary_op(const ir::Op &op, BinaryOperator::BinaryOps binop) { auto *inst = context.construct( get_value(op.getOperand(0)), get_value(op.getOperand(1)), binop); push_instruction(inst, op.getDef()); if (op.getType().getBaseType(0).isFloatType()) inst->setFast(!((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise)); return true; } template bool ParseContext::build_dxil_unary(const ir::Op &op) { assert(op.getOperandCount() == 1); auto *inst = build_dxil_call(dxop, convert_type(op.getType()), convert_type(op.getType()), get_value(op.getOperand(0))); if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise) inst->setMetadata("dx.precise", create_md_node(create_null_meta())); push_instruction(inst, op.getDef()); return true; } template bool ParseContext::build_dxil_constant_unary(const ir::Op &op) { assert(op.getOperandCount() == 1); auto *inst = build_dxil_call(dxop, convert_type(op.getType()), convert_type(op.getType()), get_constant_uint(uint32_t(op.getOperand(0)))); if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise) inst->setMetadata("dx.precise", create_md_node(create_null_meta())); push_instruction(inst, op.getDef()); return true; } template bool ParseContext::build_dxil_binary(const ir::Op &op) { assert(op.getOperandCount() == 2); auto *inst = build_dxil_call(dxop, convert_type(op.getType()), convert_type(op.getType()), get_value(op.getOperand(0)), get_value(op.getOperand(1))); if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise) inst->setMetadata("dx.precise", create_md_node(create_null_meta())); push_instruction(inst, op.getDef()); return true; } template bool ParseContext::build_dxil_trinary(const ir::Op &op) { assert(op.getOperandCount() == 3); auto *inst = build_dxil_call(dxop, convert_type(op.getType()), convert_type(op.getType()), get_value(op.getOperand(0)), get_value(op.getOperand(1)), get_value(op.getOperand(2))); if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise) inst->setMetadata("dx.precise", create_md_node(create_null_meta())); push_instruction(inst, op.getDef()); return true; } template bool ParseContext::build_dxil_quaternary(const ir::Op &op) { assert(op.getOperandCount() == 4); auto *inst = build_dxil_call(dxop, convert_type(op.getType()), convert_type(op.getType()), get_value(op.getOperand(0)), get_value(op.getOperand(1)), get_value(op.getOperand(2)), get_value(op.getOperand(3))); if ((op.getFlags() | global_fp_flags) & ir::OpFlag::ePrecise) inst->setMetadata("dx.precise", create_md_node(create_null_meta())); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_barrier(const ir::Op &op) { auto exec_scope = ir::Scope(op.getOperand(0)); auto mem_scope = ir::Scope(op.getOperand(1)); auto memory_type = ir::MemoryTypeFlags(op.getOperand(2)); auto *void_type = Type::getVoidTy(context); uint32_t memory_flags = 0; uint32_t semantic_flags = 0; semantic_flags |= DXIL::GroupScopeBit; if (exec_scope != ir::Scope::eThread) semantic_flags |= DXIL::GroupSyncBit; if (mem_scope == ir::Scope::eGlobal) semantic_flags |= DXIL::DeviceScopeBit; if (memory_type & ir::MemoryType::eLds) memory_flags |= DXIL::MemoryTypeGroupSharedBit; if (memory_type & ir::MemoryType::eUav) memory_flags |= DXIL::MemoryTypeUavBit; auto *inst = build_dxil_call(DXIL::Op::BarrierByMemoryType, void_type, void_type, get_constant_uint(memory_flags), get_constant_uint(semantic_flags)); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_demote(const ir::Op &op) { auto *void_type = Type::getVoidTy(context); auto *inst = build_dxil_call(DXIL::Op::Discard, void_type, void_type, get_constant_uint(1)); push_instruction(inst, op.getDef()); return true; } bool ParseContext::push_instruction(const ir::Op &op) { switch (op.getOpCode()) { #define OPMAP(irop, llvmop) case ir::OpCode::e##irop: if (!build_##llvmop(op)) return false; break OPMAP(InputLoad, input_load); OPMAP(OutputLoad, output_load); OPMAP(OutputStore, output_store); OPMAP(CompositeConstruct, composite_construct); OPMAP(CompositeExtract, composite_extract); OPMAP(CompositeInsert, composite_insert); OPMAP(DescriptorLoad, descriptor_load); OPMAP(BufferLoad, buffer_load); OPMAP(BufferStore, buffer_store); OPMAP(BufferAtomic, buffer_atomic); OPMAP(CounterAtomic, counter_atomic); OPMAP(BufferQuerySize, buffer_query_size); OPMAP(ImageLoad, image_load); OPMAP(ImageStore, image_store); OPMAP(ImageAtomic, image_atomic); OPMAP(ImageQuerySize, image_query_size); OPMAP(ImageQueryMips, image_query_mips_samples); OPMAP(ImageQuerySamples, image_query_mips_samples); OPMAP(ImageSample, image_sample); OPMAP(ImageGather, image_gather); OPMAP(ImageComputeLod, image_compute_lod); OPMAP(DerivX, deriv); OPMAP(DerivY, deriv); OPMAP(CheckSparseAccess, check_sparse_access); OPMAP(FRound, fround); OPMAP(FAbs, dxil_unary); OPMAP(IAbs, dxil_unary); OPMAP(FMad, dxil_trinary); OPMAP(FRcp, frcp); OPMAP(FFract, dxil_unary); OPMAP(FMin, dxil_binary); OPMAP(FMax, dxil_binary); OPMAP(SMin, dxil_binary); OPMAP(SMax, dxil_binary); OPMAP(UMin, dxil_binary); OPMAP(UMax, dxil_binary); OPMAP(FClamp, dxil_trinary); OPMAP(SClamp, dxil_trinary); OPMAP(UClamp, dxil_trinary); OPMAP(FLog2, dxil_unary); OPMAP(FExp2, dxil_unary); OPMAP(FSin, dxil_unary); OPMAP(FCos, dxil_unary); OPMAP(FSqrt, dxil_unary); OPMAP(FRsq, dxil_unary); OPMAP(FPow, dxil_binary); OPMAP(FIsNan, dxil_unary); OPMAP(ConvertF32toPackedF16, dxil_unary); OPMAP(ConvertPackedF16toF32, dxil_unary); OPMAP(InterpolateAtCentroid, interpolate_at_centroid); OPMAP(InterpolateAtSample, interpolate_at_sample); OPMAP(InterpolateAtOffset, interpolate_at_offset); OPMAP(UBitExtract, dxil_trinary); OPMAP(SBitExtract, dxil_trinary); OPMAP(IBitInsert, dxil_quaternary); OPMAP(EmitVertex, dxil_constant_unary); OPMAP(EmitPrimitive, dxil_constant_unary); OPMAP(IBitCount, dxil_unary); OPMAP(IBitReverse, dxil_unary); OPMAP(IFindLsb, dxil_unary); OPMAP(SFindMsb, dxil_unary); OPMAP(UFindMsb, dxil_unary); OPMAP(IAddCarry, dxil_binary); OPMAP(ISubBorrow, dxil_binary); OPMAP(SMulExtended, dxil_binary); OPMAP(UMulExtended, dxil_binary); OPMAP(ScratchLoad, gep_load); OPMAP(ScratchStore, gep_store); OPMAP(LdsLoad, gep_load); OPMAP(LdsStore, gep_store); OPMAP(ConstantLoad, gep_load); OPMAP(Barrier, barrier); OPMAP(LdsAtomic, lds_atomic); OPMAP(Demote, demote); #undef OPMAP // Plain instructions case ir::OpCode::eCast: { if (convert_type(op.getType()) == convert_type(builder.getOp(ir::SsaDef(op.getOperand(0))).getType())) { // I <-> U casts are meaningless. value_map[op.getDef()] = get_value(op.getOperand(0)); } else { push_instruction(context.construct(convert_type(op.getType()), get_value(op.getOperand(0)), Instruction::CastOps::BitCast), op.getDef()); } break; } case ir::OpCode::eSelect: { push_instruction(context.construct( get_value(op.getOperand(1)), get_value(op.getOperand(2)), get_value(op.getOperand(0))), op.getDef()); break; } case ir::OpCode::eFNeg: case ir::OpCode::eINeg: { push_instruction(context.construct( op.getOpCode() == ir::OpCode::eFNeg ? UnaryOperator::UnaryOps::FNeg : UnaryOperator::UnaryOps::INeg, get_value(op.getOperand(0))), op.getDef()); break; } #define CMP(irop, type, llvmop) \ case ir::OpCode::irop: \ push_instruction(context.construct( \ CmpInst::Predicate::llvmop, \ get_value(op.getOperand(0)), \ get_value(op.getOperand(1))), \ op.getDef()); break CMP(eFNe, F, FCMP_UNE); CMP(eFEq, F, FCMP_OEQ); CMP(eFGt, F, FCMP_OGT); CMP(eFGe, F, FCMP_OGE); CMP(eFLt, F, FCMP_OLT); CMP(eFLe, F, FCMP_OLE); CMP(eINe, I, ICMP_NE); CMP(eIEq, I, ICMP_EQ); CMP(eBNe, I, ICMP_NE); CMP(eBEq, I, ICMP_EQ); CMP(eSGt, I, ICMP_SGT); CMP(eSGe, I, ICMP_SGE); CMP(eSLt, I, ICMP_SLT); CMP(eSLe, I, ICMP_SLE); CMP(eUGt, I, ICMP_UGT); CMP(eUGe, I, ICMP_UGE); CMP(eULt, I, ICMP_ULT); CMP(eULe, I, ICMP_ULE); #define BOP(irop, llvmop) case ir::OpCode::irop: if (!build_binary_op(op, BinaryOperator::BinaryOps::llvmop)) return false; break BOP(eFAdd, FAdd); BOP(eFSub, FSub); BOP(eFMul, FMul); BOP(eFDiv, FDiv); BOP(eIAdd, Add); BOP(eISub, Sub); BOP(eIMul, Mul); BOP(eUDiv, UDiv); BOP(eUMod, URem); BOP(eIAnd, And); BOP(eIOr, Or); BOP(eIXor, Xor); BOP(eBAnd, And); BOP(eBOr, Or); BOP(eIShl, Shl); BOP(eUShr, LShr); BOP(eSShr, AShr); #undef BOP case ir::OpCode::eConvertFtoF: { auto &out_type = op.getType(); auto &in_type = builder.getOp(ir::SsaDef(op.getOperand(0))).getType(); if (out_type.byteSize() == in_type.byteSize()) { value_map[op.getDef()] = get_value(op.getOperand(0)); break; } bool ext = out_type.byteSize() > in_type.byteSize(); auto *inst = context.construct(convert_type(out_type), get_value(op.getOperand(0)), ext ? Instruction::CastOps::FPExt : Instruction::CastOps::FPTrunc); push_instruction(inst, op.getDef()); break; } case ir::OpCode::eConvertFtoI: { auto &out_type = op.getType(); bool is_signed = out_type.getBaseType(0).isSignedIntType(); auto *inst = context.construct(convert_type(out_type), get_value(op.getOperand(0)), is_signed ? Instruction::CastOps::FPToSI : Instruction::CastOps::FPToUI); push_instruction(inst, op.getDef()); break; } case ir::OpCode::eConvertItoF: { auto &out_type = op.getType(); auto &in_type = builder.getOp(ir::SsaDef(op.getOperand(0))).getType(); bool is_signed = in_type.getBaseType(0).isSignedIntType(); auto *inst = context.construct(convert_type(out_type), get_value(op.getOperand(0)), is_signed ? Instruction::CastOps::SIToFP : Instruction::CastOps::UIToFP); push_instruction(inst, op.getDef()); break; } case ir::OpCode::eConvertItoI: { auto &out_type = op.getType(); auto &in_type = builder.getOp(ir::SsaDef(op.getOperand(0))).getType(); bool is_signed = in_type.getBaseType(0).isSignedIntType(); bool ext = out_type.byteSize() > in_type.byteSize(); if (out_type.byteSize() == in_type.byteSize()) { value_map[op.getDef()] = get_value(op.getOperand(0)); break; } if (!ext) { auto *inst = context.construct(convert_type(out_type), get_value(op.getOperand(0)), Instruction::CastOps::Trunc); push_instruction(inst, op.getDef()); } else { auto *inst = context.construct( convert_type(out_type), get_value(op.getOperand(0)), is_signed ? Instruction::CastOps::SExt : Instruction::CastOps::ZExt); push_instruction(inst, op.getDef()); } break; } case ir::OpCode::eINot: case ir::OpCode::eBNot: { auto *result_type = convert_type(op.getType()); auto *scalar_type = result_type; Constant *constant_max; if (const auto *vec = llvm::dyn_cast(result_type)) { scalar_type = vec->getElementType(); constant_max = ConstantInt::get(scalar_type, UINT64_MAX); // Only vec2 is supported. constant_max = context.construct(result_type, Vector{ constant_max, constant_max }); } else { constant_max = ConstantInt::get(scalar_type, UINT64_MAX); } auto *inst = context.construct(get_value(op.getOperand(0)), constant_max, Instruction::BinaryOps::Xor); push_instruction(inst, op.getDef()); break; } case ir::OpCode::eFunctionCall: { auto itr = function_map.find(ir::SsaDef(op.getOperand(0))); if (itr == function_map.end()) return false; auto *func = itr->second; Vector args; args.reserve(op.getOperandCount() - 1); for (uint32_t i = 1; i < op.getOperandCount(); i++) args.push_back(get_value(op.getOperand(i))); push_instruction(context.construct(func->getFunctionType(), func, std::move(args)), op.getDef()); break; } default: LOGE("Unimplemented opcode %u\n", unsigned(op.getOpCode())); return false; } return true; } Instruction *ParseContext::build_load_input( uint32_t index, Type *type, Value *row, uint32_t col, Value *axis, bool patch) { assert(index != UINT32_MAX); auto *inst = build_dxil_call( patch ? DXIL::Op::LoadPatchConstant : DXIL::Op::ExtendedSpirvLoadInput, type, type, get_constant_uint(index), row, get_constant_uint(col), axis ? axis : UndefValue::get(Type::getInt32Ty(context))); return inst; } Instruction *ParseContext::build_load_output( uint32_t index, Type *type, Value *row, uint32_t col, Value *axis, bool patch) { assert(index != UINT32_MAX); Instruction *inst; // This is slightly extended internally to allow loading outputs in general. if (patch) { inst = build_dxil_call(DXIL::Op::LoadPatchConstant, type, type, get_constant_uint(index), row, get_constant_uint(col)); } else { inst = build_dxil_call(DXIL::Op::LoadOutputControlPoint, type, type, get_constant_uint(index), row, get_constant_uint(col), axis ? axis : UndefValue::get(Type::getInt32Ty(context))); } return inst; } Instruction *ParseContext::build_store_output(uint32_t index, Value *row, uint32_t col, Value *value, bool patch) { assert(index != UINT32_MAX); auto *inst = build_dxil_call( patch ? DXIL::Op::StorePatchConstant : DXIL::Op::StoreOutput, Type::getVoidTy(context), value->getType(), get_constant_uint(index), row, get_constant_uint(col), value); return inst; } Instruction *ParseContext::build_load_builtin(DXIL::Op opcode, ir::SsaDef addr) { Type *type; if (opcode == DXIL::Op::InnerCoverage) type = Type::getInt1Ty(context); else type = Type::getInt32Ty(context); if (addr) return build_dxil_call(opcode, type, nullptr, get_value(addr)); else return build_dxil_call(opcode, type, nullptr); } Value *ParseContext::get_extracted_composite_component(Value *value, unsigned component) { if (!isa(value->getType()) && !isa(value->getType())) { assert(component == 0); return value; } // Common pattern where composites are constructed only to be extracted again. if (const auto *comp = dyn_cast(value)) return comp->getOperand(component); if (const auto *vec = dyn_cast(value)) return vec->getElementAsConstant(component); ExtractValueInst *extracted; if (const auto *vec_type = dyn_cast(value->getType())) extracted = context.construct(vec_type->getElementType(), value, Vector{ component }); else if (const auto *struct_type = dyn_cast(value->getType())) extracted = context.construct(struct_type->getStructElementType(component), value, Vector{ component }); else return nullptr; push_instruction(extracted); return extracted; } Value *ParseContext::get_constant_mul(Value *value, uint32_t scale) { // If there is already a multiplier, fold it in to help dxil-spirv analysis get proper vectorization. if (const auto *cint = dyn_cast(value)) { return get_constant_uint(cint->getUniqueInteger().getZExtValue() * scale); } else if (const auto *bop = dyn_cast(value)) { if (bop->getOpcode() == BinaryOperator::BinaryOps::Mul) { auto *ca = dyn_cast(bop->getOperand(0)); auto *cb = dyn_cast(bop->getOperand(1)); if (ca && cb) { return get_constant_uint( ca->getUniqueInteger().getZExtValue() * cb->getUniqueInteger().getZExtValue() * scale); } else if (ca || cb) { auto *c = ca ? ca : cb; auto *other = bop->getOperand(ca ? 1 : 0); auto *inst = context.construct( get_constant_uint( c->getUniqueInteger().getZExtValue() * scale), other, BinaryOperator::BinaryOps::Mul); push_instruction(inst); return inst; } } else if (bop->getOpcode() == BinaryOperator::BinaryOps::Add) { if (isa(bop->getOperand(0)) || isa(bop->getOperand(1))) { // Avoid nested scaling. Scale each side. Probably only worth it if at least one of them is a constant. auto *scaled_a = get_constant_mul(bop->getOperand(0), scale); auto *scaled_b = get_constant_mul(bop->getOperand(1), scale); auto *inst = context.construct(scaled_a, scaled_b, BinaryOperator::BinaryOps::Add); push_instruction(inst); return inst; } } } auto *inst = context.construct(get_constant_uint(scale), value, BinaryOperator::BinaryOps::Mul); push_instruction(inst); return inst; } static VectorType *get_vec4_variant(Type *type) { if (auto *vec = dyn_cast(type)) { if (vec->getVectorSize() == 4) return vec; else return VectorType::get(4, vec->getElementType()); } else return VectorType::get(4, type); } static StructType *get_sparse_feedback_variant(Type *type) { auto *scalar_type = get_scalar_type(type->getStructElementType(1)); return StructType::get( type->getContext(), { scalar_type, scalar_type, scalar_type, scalar_type, Type::getInt32Ty(type->getContext()) }); } static Type *get_composite_return_type(Type *type) { if (isa(type)) return get_sparse_feedback_variant(type); else return get_vec4_variant(type); } Instruction *ParseContext::build_extract_composite(const ir::Op &op, Value *value, unsigned num_elements) { if (!num_elements) num_elements = op.getType().getBaseType(0).getVectorSize(); if (num_elements == 1) return context.construct(get_scalar_type(value->getType()), value, Vector{ 0 }); Value *values[4]; for (unsigned c = 0; c < num_elements; c++) values[c] = get_extracted_composite_component(value, c); assert(num_elements > 1); auto *result_type = VectorType::get(num_elements, get_scalar_type(value->getType())); auto *comp = context.construct( result_type, Vector { values, values + num_elements }); return comp; } bool ParseContext::build_buffer_load_return_composite(const ir::Op &op, Value *value) { if (op.getType().isStructType()) { // Sparse feedback. auto *code = get_extracted_composite_component(value, 4); auto *sampled_value = build_extract_composite(op, value, op.getType().getBaseType(1).getVectorSize()); push_instruction(sampled_value); auto *inst = context.construct(convert_type(op.getType()), Vector{ code, sampled_value }); push_instruction(inst, op.getDef()); return true; } else { unsigned num_elements = op.getType().getBaseType(0).getVectorSize(); if (num_elements != 1) push_instruction(build_extract_composite(op, value, num_elements), op.getDef()); else value_map[op.getDef()] = get_extracted_composite_component(value, 0); } return true; } bool ParseContext::build_buffer_load(const ir::Op &op, DXIL::ResourceKind kind) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto *int_type = Type::getInt32Ty(context); auto *addr_value = get_value(op.getOperand(1)); Value *first; Value *second; if (kind == DXIL::ResourceKind::StructuredBuffer) { first = get_extracted_composite_component(addr_value, 0); second = get_extracted_composite_component(addr_value, 1); second = get_constant_mul(second, 4); } else { first = addr_value; if (kind == DXIL::ResourceKind::RawBuffer) first = get_constant_mul(first, 4); second = UndefValue::get(int_type); } auto *result_type = convert_type(op.getType()); auto *dxil_result_type = get_composite_return_type(result_type); auto *inst = build_dxil_call( DXIL::Op::BufferLoad, dxil_result_type, dxil_result_type, get_value(descriptor), first, second); push_instruction(inst); return build_buffer_load_return_composite(op, inst); } bool ParseContext::build_buffer_load_cbv(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto addr = ir::SsaDef(op.getOperand(1)); Instruction *inst = nullptr; if (op.getType().isScalarType()) { if (op.getType().byteSize() != 4) { LOGE("Only support 4 byte scalar CBV loads.\n"); return false; } auto *result_type = convert_type(op.getType()); auto *addr_value = get_value(addr); if (!llvm::isa(addr_value->getType())) { LOGE("Expected a vector type addr for vectors.\n"); return false; } auto *index16 = get_extracted_composite_component(addr_value, 0); auto *index4 = get_extracted_composite_component(addr_value, 1); auto *mul16 = get_constant_mul(index16, 16); auto *mul4 = get_constant_mul(index4, 4); Value *byte_addr; if (isa(mul16) && isa(mul4)) { byte_addr = get_constant_uint(cast(mul16)->getUniqueInteger().getZExtValue() + cast(mul4)->getUniqueInteger().getZExtValue()); } else { auto *byte_addr_inst = context.construct(mul16, mul4, llvm::BinaryOperator::BinaryOps::Add); push_instruction(byte_addr_inst); byte_addr = byte_addr_inst; } inst = build_dxil_call(DXIL::Op::CBufferLoad, result_type, result_type, get_value(descriptor), byte_addr); } else if (op.getType().isVectorType()) { if (op.getType().getBaseType(0).getVectorSize() != 4 || op.getType().byteSize() != 16) { LOGE("We can only support vec4 or scalar loads from CBV.\n"); return false; } auto *result_type = convert_type(op.getType()); auto *addr_value = get_value(addr); inst = build_dxil_call(DXIL::Op::CBufferLoadLegacy, result_type, result_type, get_value(descriptor), addr_value); } push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_buffer_load(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; // This function is overloaded, so need to figure out which type of load we should generate. if (itr->second.resource_type == DXIL::ResourceType::CBV) return build_buffer_load_cbv(op); else return build_buffer_load(op, itr->second.resource_kind); } bool ParseContext::build_image_store(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto layer = ir::SsaDef(op.getOperand(1)); auto coord = ir::SsaDef(op.getOperand(2)); auto value = ir::SsaDef(op.getOperand(3)); Value *coords[3] = {}; Value *values[4] = {}; unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize(); unsigned num_value_components = builder.getOp(value).getType().getBaseType(0).getVectorSize(); auto *scalar_type = get_scalar_type(get_value(value)->getType()); auto *coord_value = get_value(coord); for (unsigned c = 0; c < num_coord_components; c++) coords[c] = get_extracted_composite_component(coord_value, c); for (unsigned c = num_coord_components; c < 3; c++) coords[c] = UndefValue::get(Type::getInt32Ty(context)); switch (itr->second.resource_kind) { case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2DArray: coords[num_coord_components] = get_value(layer); break; default: break; } for (unsigned c = 0; c < num_value_components; c++) values[c] = get_extracted_composite_component(get_value(value), c); for (unsigned c = num_value_components; c < 4; c++) values[c] = UndefValue::get(scalar_type); unsigned mask = (1u << num_value_components) - 1u; auto *inst = build_dxil_call(DXIL::Op::TextureStore, Type::getVoidTy(context), scalar_type, get_value(descriptor), coords[0], coords[1], coords[2], values[0], values[1], values[2], values[3], get_constant_uint(mask)); push_instruction(inst); return true; } bool ParseContext::build_image_atomic(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto layer = ir::SsaDef(op.getOperand(1)); auto coord = ir::SsaDef(op.getOperand(2)); auto atomic_op = ir::AtomicOp(op.getOperand(op.getFirstLiteralOperandIndex())); Value *coords[3] = {}; unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize(); auto *int_type = Type::getInt32Ty(context); for (unsigned c = 0; c < num_coord_components; c++) coords[c] = get_extracted_composite_component(get_value(coord), c); for (unsigned c = num_coord_components; c < 3; c++) coords[c] = UndefValue::get(Type::getInt32Ty(context)); switch (itr->second.resource_kind) { case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2DArray: coords[num_coord_components] = get_value(layer); break; default: break; } if (atomic_op == ir::AtomicOp::eCompareExchange) { auto *inst = build_dxil_call( DXIL::Op::AtomicCompareExchange, int_type, int_type, get_value(descriptor), coords[0], coords[1], coords[2], get_extracted_composite_component(get_value(op.getOperand(3)), 0), get_extracted_composite_component(get_value(op.getOperand(3)), 1)); push_instruction(inst, op.getDef()); return true; } auto binop = convert_atomic_binop(atomic_op); auto *return_type = convert_type(op.getType()); Value *value; if (binop == DXIL::AtomicBinOp::Load) { value = UndefValue::get(int_type); } else if (atomic_op == ir::AtomicOp::eInc || atomic_op == ir::AtomicOp::eDec) { value = get_constant_uint(1); } else { value = get_value(op.getOperand(3)); if (binop != DXIL::AtomicBinOp::Store && op.getType().isVoidType()) return_type = int_type; } auto *inst = build_dxil_call( DXIL::Op::AtomicBinOp, return_type, return_type, get_value(descriptor), get_constant_uint(uint32_t(binop)), coords[0], coords[1], coords[2], value); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_image_load(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto kind = itr->second.resource_kind; auto *int_type = Type::getInt32Ty(context); auto mip = ir::SsaDef(op.getOperand(1)); auto layer = ir::SsaDef(op.getOperand(2)); auto coord = ir::SsaDef(op.getOperand(3)); auto sample = ir::SsaDef(op.getOperand(4)); auto offset = ir::SsaDef(op.getOperand(5)); Value *mip_or_sample = nullptr; Value *offsets[3] = {}; Value *coords[3] = {}; if (kind == DXIL::ResourceKind::TextureCube || kind == DXIL::ResourceKind::TextureCubeArray) { LOGE("Cubes not allowed for loads.\n"); return false; } if (kind == DXIL::ResourceKind::Texture2DMS || kind == DXIL::ResourceKind::Texture2DMSArray) mip_or_sample = get_value(sample); else if (itr->second.resource_type == DXIL::ResourceType::SRV) mip_or_sample = get_value(mip); unsigned coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize(); for (unsigned c = 0; c < coord_components; c++) { coords[c] = get_extracted_composite_component(get_value(coord), c); if (offset) offsets[c] = get_extracted_composite_component(get_value(offset), c); } if (kind == DXIL::ResourceKind::Texture1DArray || kind == DXIL::ResourceKind::Texture2DArray || kind == DXIL::ResourceKind::Texture2DMSArray) { coords[coord_components] = get_value(layer); } if (!mip_or_sample) mip_or_sample = UndefValue::get(int_type); for (auto &off : offsets) if (!off) off = UndefValue::get(int_type); for (auto &c : coords) if (!c) c = UndefValue::get(int_type); auto *result_type = convert_type(op.getType()); auto *dxil_result_type = get_composite_return_type(result_type); auto *inst = build_dxil_call( DXIL::Op::TextureLoad, dxil_result_type, dxil_result_type, get_value(descriptor), mip_or_sample, coords[0], coords[1], coords[2], offsets[0], offsets[1], offsets[2]); push_instruction(inst); return build_buffer_load_return_composite(op, inst); } bool ParseContext::build_image_query_size(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto *result_type = convert_type(op.getType()); auto *dxil_result_type = VectorType::get(4, Type::getInt32Ty(context)); auto kind = itr->second.resource_kind; auto *inst = build_dxil_call( DXIL::Op::GetDimensions, dxil_result_type, dxil_result_type, get_value(descriptor), op.getOperand(1) ? get_value(op.getOperand(1)) : UndefValue::get(Type::getInt32Ty(context))); push_instruction(inst); unsigned num_dimensions = op.getType().getSubType(0).getBaseType(0).getVectorSize(); auto *dims = build_extract_composite(op, inst, num_dimensions); push_instruction(dims); Value *layers; if (kind == DXIL::ResourceKind::Texture1DArray || kind == DXIL::ResourceKind::Texture2DArray || kind == DXIL::ResourceKind::Texture2DMSArray || kind == DXIL::ResourceKind::TextureCubeArray) { layers = get_extracted_composite_component(inst, num_dimensions); } else { layers = get_constant_uint(1); } inst = context.construct(result_type, Vector{ dims, layers }); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_image_query_mips_samples(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto *dxil_result_type = VectorType::get(4, Type::getInt32Ty(context)); auto *inst = build_dxil_call( DXIL::Op::GetDimensions, dxil_result_type, dxil_result_type, get_value(descriptor), get_constant_uint(0)); push_instruction(inst); // Mips are encoded in the last structure element, for reasons. auto *value = get_extracted_composite_component(inst, 3); value_map[op.getDef()] = value; return true; } bool ParseContext::build_image_sample(const ir::Op &op) { auto image_desc = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(image_desc); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto layer = ir::SsaDef(op.getOperand(2)); auto coord = ir::SsaDef(op.getOperand(3)); auto offset = ir::SsaDef(op.getOperand(4)); auto lod_index = ir::SsaDef(op.getOperand(5)); auto lod_bias = ir::SsaDef(op.getOperand(6)); auto lod_clamp = ir::SsaDef(op.getOperand(7)); auto dx = ir::SsaDef(op.getOperand(8)); auto dy = ir::SsaDef(op.getOperand(9)); auto dref = ir::SsaDef(op.getOperand(10)); auto opcode = DXIL::Op::Sample; if (lod_index) opcode = DXIL::Op::SampleLevel; else if (lod_bias) opcode = DXIL::Op::SampleBias; else if (dx && dy) opcode = DXIL::Op::SampleGrad; if (op.getType().isScalarType()) { switch (opcode) { case DXIL::Op::Sample: opcode = DXIL::Op::SampleCmp; break; case DXIL::Op::SampleLevel: opcode = DXIL::Op::SampleCmpLevel; break; case DXIL::Op::SampleBias: opcode = DXIL::Op::SampleCmpBias; break; case DXIL::Op::SampleGrad: opcode = DXIL::Op::SampleCmpGrad; break; default: return false; } } unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize(); Value *coords[4] = {}; Value *offsets[3] = {}; Value *ddx[3] = {}; Value *ddy[3] = {}; for (unsigned c = 0; c < num_coord_components; c++) { coords[c] = get_extracted_composite_component(get_value(coord), c); if (offset) offsets[c] = get_extracted_composite_component(get_value(offset), c); if (dx) ddx[c] = get_extracted_composite_component(get_value(dx), c); if (dy) ddy[c] = get_extracted_composite_component(get_value(dy), c); } switch (itr->second.resource_kind) { case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2DArray: case DXIL::ResourceKind::TextureCubeArray: coords[num_coord_components] = get_value(layer); break; default: break; } Vector values; values.push_back(get_value(image_desc)); values.push_back(get_value(op.getOperand(1))); // sampler for (auto *c : coords) values.push_back(c ? c : UndefValue::get(Type::getFloatTy(context))); for (auto *o : offsets) values.push_back(o ? o : UndefValue::get(Type::getInt32Ty(context))); if (op.getType().isScalarType()) values.push_back(get_value(dref)); if (opcode == DXIL::Op::SampleGrad || opcode == DXIL::Op::SampleCmpGrad) { for (auto *d : ddx) values.push_back(d ? d : UndefValue::get(Type::getFloatTy(context))); for (auto *d : ddy) values.push_back(d ? d : UndefValue::get(Type::getFloatTy(context))); } if (opcode == DXIL::Op::SampleBias || opcode == DXIL::Op::SampleCmpBias) values.push_back(get_value(lod_bias)); if (opcode != DXIL::Op::SampleLevel && opcode != DXIL::Op::SampleCmpLevel) values.push_back(lod_clamp ? get_value(lod_clamp) : UndefValue::get(Type::getFloatTy(context))); else if (lod_index) values.push_back(get_value(lod_index)); auto *result_type = convert_type(op.getType()); auto *dxil_result_type = get_composite_return_type(result_type); auto *inst = build_dxil_call(opcode, dxil_result_type, dxil_result_type, std::move(values)); push_instruction(inst); return build_buffer_load_return_composite(op, inst); } bool ParseContext::build_image_gather(const ir::Op &op) { auto image_desc = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(image_desc); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto layer = ir::SsaDef(op.getOperand(2)); auto coord = ir::SsaDef(op.getOperand(3)); auto offset = ir::SsaDef(op.getOperand(4)); auto dref = ir::SsaDef(op.getOperand(5)); auto comp = uint32_t(op.getOperand(6)); auto opcode = dref ? DXIL::Op::TextureGatherCmp : DXIL::Op::TextureGather; unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize(); Value *coords[4] = {}; Value *offsets[2] = {}; for (unsigned c = 0; c < num_coord_components; c++) { coords[c] = get_extracted_composite_component(get_value(coord), c); if (offset) offsets[c] = get_extracted_composite_component(get_value(offset), c); } switch (itr->second.resource_kind) { case DXIL::ResourceKind::Texture2DArray: case DXIL::ResourceKind::TextureCubeArray: coords[num_coord_components] = get_value(layer); break; default: break; } Vector values; values.push_back(get_value(image_desc)); values.push_back(get_value(op.getOperand(1))); // sampler for (auto *c : coords) values.push_back(c ? c : UndefValue::get(Type::getFloatTy(context))); for (auto *o : offsets) values.push_back(o ? o : UndefValue::get(Type::getInt32Ty(context))); values.push_back(get_constant_uint(comp)); if (dref) values.push_back(get_value(dref)); auto *result_type = convert_type(op.getType()); auto *dxil_result_type = get_composite_return_type(result_type); auto *inst = build_dxil_call(opcode, dxil_result_type, dxil_result_type, std::move(values)); push_instruction(inst); return build_buffer_load_return_composite(op, inst); } bool ParseContext::build_image_compute_lod(const ir::Op &op) { auto image_desc = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(image_desc); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto coord = ir::SsaDef(op.getOperand(2)); unsigned num_coord_components = builder.getOp(coord).getType().getBaseType(0).getVectorSize(); Value *coords[3] = {}; for (unsigned c = 0; c < num_coord_components; c++) coords[c] = get_extracted_composite_component(get_value(coord), c); for (unsigned c = num_coord_components; c < 3; c++) coords[c] = UndefValue::get(Type::getFloatTy(context)); // Alternate extended formulation since DXIL is weird. auto *inst = build_dxil_call(DXIL::Op::ExtendedCalculateLOD, convert_type(op.getType()), nullptr, get_value(image_desc), get_value(op.getOperand(1)), coords[0], coords[1], coords[2]); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_buffer_store(const ir::Op &op, DXIL::ResourceKind kind) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto *int_type = Type::getInt32Ty(context); auto *addr_value = get_value(op.getOperand(1)); Value *first; Value *second; // TODO: Adjust byte offset. if (kind == DXIL::ResourceKind::StructuredBuffer) { first = get_extracted_composite_component(addr_value, 0); second = get_extracted_composite_component(addr_value, 1); second = get_constant_mul(second, 4); } else { first = addr_value; if (kind == DXIL::ResourceKind::RawBuffer) first = get_constant_mul(first, 4); second = UndefValue::get(int_type); } auto *value = get_value(op.getOperand(2)); Value *scalar_values[4]; auto *scalar_type = value->getType(); unsigned num_components = 1; if (const auto *vec = dyn_cast(scalar_type)) { scalar_type = vec->getElementType(); num_components = vec->getVectorSize(); } unsigned mask = (1u << num_components) - 1u; for (unsigned c = 0; c < num_components; c++) scalar_values[c] = get_extracted_composite_component(value, c); for (unsigned c = num_components; c < 4; c++) scalar_values[c] = UndefValue::get(scalar_type); auto *inst = build_dxil_call(DXIL::Op::BufferStore, Type::getVoidTy(context), scalar_type, get_value(descriptor), first, second, scalar_values[0], scalar_values[1], scalar_values[2], scalar_values[3], get_constant_uint(mask)); push_instruction(inst); return true; } bool ParseContext::build_buffer_store(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; return build_buffer_store(op, itr->second.resource_kind); } bool ParseContext::build_buffer_atomic_binop(const ir::Op &op, DXIL::ResourceKind kind) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto *int_type = Type::getInt32Ty(context); auto *addr_value = get_value(op.getOperand(1)); Value *first; Value *second; // TODO: Adjust byte offset. if (kind == DXIL::ResourceKind::StructuredBuffer) { first = get_extracted_composite_component(addr_value, 0); second = get_extracted_composite_component(addr_value, 1); second = get_constant_mul(second, 4); } else { first = addr_value; if (kind == DXIL::ResourceKind::RawBuffer) first = get_constant_mul(first, 4); second = UndefValue::get(int_type); } auto atomic_op = ir::AtomicOp(op.getOperand(op.getFirstLiteralOperandIndex())); Value *value; auto *return_type = convert_type(op.getType()); if (atomic_op == ir::AtomicOp::eCompareExchange) { auto *inst = build_dxil_call( DXIL::Op::AtomicCompareExchange, int_type, int_type, get_value(descriptor), first, second, UndefValue::get(int_type), get_extracted_composite_component(get_value(op.getOperand(2)), 0), get_extracted_composite_component(get_value(op.getOperand(2)), 1)); push_instruction(inst, op.getDef()); return true; } auto binop = convert_atomic_binop(atomic_op); if (binop == DXIL::AtomicBinOp::Load) { value = UndefValue::get(int_type); } else if (atomic_op == ir::AtomicOp::eInc || atomic_op == ir::AtomicOp::eDec) { value = get_constant_uint(1); } else { value = get_value(op.getOperand(2)); if (binop != DXIL::AtomicBinOp::Store && op.getType().isVoidType()) return_type = int_type; } auto *inst = build_dxil_call( DXIL::Op::AtomicBinOp, return_type, return_type, get_value(descriptor), get_constant_uint(uint32_t(binop)), first, second, UndefValue::get(int_type), value); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_lds_atomic(const ir::Op &op) { auto *lds = get_value(op.getOperand(0)); Vector args; args.push_back(lds); args.push_back(get_constant_uint(0)); if (op.getOperand(1)) { auto &addr = builder.getOp(ir::SsaDef(op.getOperand(1))); auto *addr_value = get_value(op.getOperand(1)); for (uint32_t i = 0; i < addr.getType().getBaseType(0).getVectorSize(); i++) args.push_back(get_extracted_composite_component(addr_value, i)); } Type *type; if (!op.getType().isVoidType()) type = convert_type(op.getType()); else type = convert_type(builder.getOp(ir::SsaDef(op.getOperand(2))).getType()); auto *gep = context.construct( PointerType::get(type, uint32_t(DXIL::AddressSpace::GroupShared)), std::move(args), true); push_instruction(gep); auto *value = get_value(op.getOperand(2)); auto atomic_op = ir::AtomicOp(op.getOperand(3)); if (atomic_op == ir::AtomicOp::eCompareExchange) { auto *inst = context.construct( gep, get_extracted_composite_component(value, 0), get_extracted_composite_component(value, 1), type); push_instruction(inst, op.getDef()); } else { if (atomic_op == ir::AtomicOp::eInc || atomic_op == ir::AtomicOp::eDec) value = get_constant_uint(1); else if (atomic_op == ir::AtomicOp::eLoad) value = get_constant_uint(0); assert(value); auto *inst = context.construct( type, gep, value, convert_atomic_binop_llvm(atomic_op)); push_instruction(inst, op.getDef()); } return true; } bool ParseContext::build_buffer_atomic(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; return build_buffer_atomic_binop(op, itr->second.resource_kind); } bool ParseContext::build_counter_atomic(const ir::Op &op) { auto &load_desc_op = builder.getOp(ir::SsaDef(op.getOperand(0))); auto counter_descriptor = ir::SsaDef(load_desc_op.getOperand(0)); auto *int_type = Type::getInt32Ty(context); auto &counter_resource_op = builder.getOp(counter_descriptor); auto descriptor = ir::SsaDef(counter_resource_op.getOperand(1)); auto itr = resource_map.find(descriptor); if (itr == resource_map.end()) return false; auto *inst = build_dxil_call( DXIL::Op::BufferUpdateCounter, int_type, int_type, get_value(load_desc_op.getDef()), get_constant_uint(ir::AtomicOp(op.getOperand(1)) == ir::AtomicOp::eInc ? 1 : -1)); push_instruction(inst, op.getDef()); return true; } bool ParseContext::build_buffer_query_size(const ir::Op &op) { auto descriptor = ir::SsaDef(op.getOperand(0)); auto &resource_op = builder.getOp(descriptor); auto itr = resource_map.find(ir::SsaDef(resource_op.getOperand(0))); if (itr == resource_map.end()) return false; auto *result_type = convert_type(op.getType()); auto *vec4_type = get_vec4_variant(result_type); // Fold in the mul + div into a plain OpArrayLength. auto *inst = build_dxil_call( DXIL::Op::ExtendedGetDimensions, vec4_type, nullptr, get_value(descriptor), UndefValue::get(Type::getInt32Ty(context)), get_constant_uint(itr->second.resource_kind == DXIL::ResourceKind::RawBuffer ? 4 : 1)); push_instruction(inst); auto *value = get_extracted_composite_component(inst, 0); value_map[op.getDef()] = value; return true; } Instruction *ParseContext::build_descriptor_load(ir::SsaDef resource, ir::SsaDef index, bool nonuniform) { auto itr = resource_map.find(resource); if (itr == resource_map.end()) return nullptr; // Dummy pointer type which represents handles. // It's not directly used. auto *ptr_type = PointerType::get(Type::getVoidTy(context), 0); auto *bool_type = Type::getInt1Ty(context); Value *binding_offset; if (index) { auto *dynamic_offset = get_value(index); if (const auto *const_offset = llvm::dyn_cast(dynamic_offset)) { binding_offset = get_constant_uint( const_offset->getUniqueInteger().getZExtValue() + itr->second.binding_offset); } else if (itr->second.binding_offset) { // SM 5.1 bindless. auto *add = context.construct(dynamic_offset, get_constant_uint(itr->second.binding_offset), BinaryOperator::BinaryOps::Add); push_instruction(add); binding_offset = add; } else { binding_offset = dynamic_offset; } } else { // DXIL is a bit silly and takes effective register index instead of offset into binding space. binding_offset = get_constant_uint(itr->second.binding_offset); } return build_dxil_call(DXIL::Op::CreateHandle, ptr_type, nullptr, get_constant_uint(uint32_t(itr->second.resource_type)), get_constant_uint(itr->second.index), binding_offset, ConstantInt::get(bool_type, nonuniform)); } MDOperand *ParseContext::create_null_meta() { return context.construct(&module, MetadataKind::None); } MDNode *ParseContext::create_md_node(Vector ops) { auto *node = context.construct(&module, std::move(ops)); node->set_tween_id(++metadata_tween_id); module.add_unnamed_metadata(node); return node; } void ParseContext::create_named_md_node(const String &name, MDNode *node) { Vector vops { node }; auto *n = context.construct(&module, name, std::move(vops)); module.add_named_metadata(name, n); } MDNode *ParseContext::create_stage_io_meta() { struct IOOp { const ir::Op *op; std::string semantic; uint32_t index; }; std::vector io_inputs, io_outputs, io_patch; for (auto &op : builder) { switch (op.getOpCode()) { case ir::OpCode::eDclInput: case ir::OpCode::eDclInputBuiltIn: // For user-IO the general rule is that if it's an array it's a control point of some kind. // Multiple rows for stage IO is not used except for certain builtins. if (io_decl_is_patch(shader_stage, op)) io_patch.push_back({ &op }); else io_inputs.push_back({ &op }); break; case ir::OpCode::eDclOutput: case ir::OpCode::eDclOutputBuiltIn: // For user-IO the general rule is that if it's an array it's a control point of some kind. // Multiple rows for stage IO is not used except for certain builtins. if (io_decl_is_patch(shader_stage, op)) io_patch.push_back({ &op }); else io_outputs.push_back({ &op }); break; case ir::OpCode::eSemantic: { std::vector *sems[] = { &io_inputs, &io_outputs, &io_patch }; for (auto *sem : sems) { for (auto &ioop : *sem) { if (ioop.op->getDef() == ir::SsaDef(op.getOperand(0))) { ioop.index = uint32_t(op.getOperand(1)); ioop.semantic = op.getLiteralString(2); } } } break; } default: break; } } const struct { std::vector *ioop; MetadataMapping *mapping; } mappings[] = { { &io_inputs, &inputs }, { &io_outputs, &outputs }, { &io_patch, &patches }, }; for (auto &mapping : mappings) { for (auto &io : *mapping.ioop) { DXIL::Semantic builtin = shader_stage == ir::ShaderStage::ePixel && io.op->getOpCode() == ir::OpCode::eDclOutput ? DXIL::Semantic::Target : DXIL::Semantic::User; uint32_t location, component; uint32_t stream = UINT32_MAX; bool is_input = io.op->getOpCode() == ir::OpCode::eDclInput || io.op->getOpCode() == ir::OpCode::eDclInputBuiltIn; bool is_user = io.op->getOpCode() == ir::OpCode::eDclInput || io.op->getOpCode() == ir::OpCode::eDclOutput; if (is_user) { location = uint32_t(io.op->getOperand(1)); component = uint32_t(io.op->getOperand(2)); if (!is_input && io.op->getOperandCount() == 4) stream = uint32_t(io.op->getOperand(3)); } else { builtin = convert_semantic(ir::BuiltIn(io.op->getOperand(1))); location = UINT32_MAX; component = UINT32_MAX; if (!is_input && io.op->getOperandCount() == 3) stream = uint32_t(io.op->getOperand(2)); if (builtin == DXIL::Semantic::Depth) { for_all_opcodes(builder, ir::OpCode::eSetPsDepthLessEqual, [&](const ir::Op &op) { builtin = DXIL::Semantic::DepthLessEqual; return false; }); for_all_opcodes(builder, ir::OpCode::eSetPsDepthGreaterEqual, [&](const ir::Op &op) { builtin = DXIL::Semantic::DepthGreaterEqual; return false; }); } if (io.op->getOpCode() == ir::OpCode::eDclInputBuiltIn) { // Some stage IO builtins are resolved through opcodes, not IO. auto op = convert_builtin_opcode(ir::BuiltIn(io.op->getOperand(1))); if (builtin == DXIL::Semantic::Coverage) op = DXIL::Op::Coverage; if (op != DXIL::Op::Count) { stage_io_map[io.op->getDef()] = { UINT32_MAX, op, false }; continue; } } } auto interpolation = DXIL::InterpolationMode::Invalid; if (is_input) interpolation = convert_interpolation_mode(ir::InterpolationMode(io.op->getOperand(is_user ? 3 : 2))); bool is_geom = shader_stage == ir::ShaderStage::eGeometry; bool is_tess = shader_stage == ir::ShaderStage::eHull || shader_stage == ir::ShaderStage::eDomain; bool is_geom_tess_input = is_input && (is_geom || is_tess); bool is_hull_output = !is_input && shader_stage == ir::ShaderStage::eHull; // TessFactors is the exception since it's a patch array. bool need_axis = io.op->getType().isArrayType() && (builtin != DXIL::Semantic::TessFactor && builtin != DXIL::Semantic::InsideTessFactor) && (is_geom_tess_input || is_hull_output); auto comp = convert_component_mapping(io.op->getType(), need_axis); build_stage_io(*mapping.mapping, io.op->getDef(), String(io.semantic), comp.type, builtin, io.index, interpolation, comp.num_rows, comp.num_cols, location, component, stream, need_axis); } } return create_md_node( inputs.nodes.empty() ? create_null_meta() : create_md_node(inputs.nodes), outputs.nodes.empty() ? create_null_meta() : create_md_node(outputs.nodes), patches.nodes.empty() ? create_null_meta() : create_md_node(patches.nodes)); } MDOperand *ParseContext::create_entry_point_meta(Function *patch_control_func) { Vector flag_ops; uint64_t shader_flags = 0; flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::ShaderFlags))); if (shader_stage == ir::ShaderStage::ePixel) { for_all_opcodes(builder, ir::OpCode::eSetPsEarlyFragmentTest, [&](const ir::Op &) { shader_flags |= DXIL::ShaderFlagEarlyDepthStencil; return false; }); } flag_ops.push_back(create_constant_uint64_meta(shader_flags)); if (shader_stage == ir::ShaderStage::eCompute) { flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::NumThreads))); const ir::Op *threads = nullptr; for_all_opcodes(builder, ir::OpCode::eSetCsWorkgroupSize, [&](const ir::Op &op) { threads = &op; return false; }); if (!threads) { LOGE("Need to declare threads.\n"); return nullptr; } flag_ops.push_back(create_md_node( create_constant_uint_meta(uint32_t(threads->getOperand(1))), create_constant_uint_meta(uint32_t(threads->getOperand(2))), create_constant_uint_meta(uint32_t(threads->getOperand(3))))); } else if (shader_stage == ir::ShaderStage::eGeometry) { flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::GSState))); ir::PrimitiveType input_primitive = {}; ir::PrimitiveType output_primitive = {}; uint32_t stream_mask = 0; uint32_t instances = 0; uint32_t output_vertices = 0; for (auto &op : builder) { switch (op.getOpCode()) { case ir::OpCode::eSetGsInstances: instances = uint32_t(op.getOperand(1)); break; case ir::OpCode::eSetGsOutputVertices: output_vertices = uint32_t(op.getOperand(1)); break; case ir::OpCode::eSetGsInputPrimitive: input_primitive = ir::PrimitiveType(op.getOperand(1)); break; case ir::OpCode::eSetGsOutputPrimitive: output_primitive = ir::PrimitiveType(op.getOperand(1)); stream_mask = uint32_t(op.getOperand(2)); break; default: break; } } flag_ops.push_back(create_md_node( create_constant_uint_meta(uint32_t(convert_input_primitive_type(input_primitive))), create_constant_uint_meta(output_vertices), create_constant_uint_meta(stream_mask), create_constant_uint_meta(uint32_t(convert_output_primitive_type(output_primitive))), create_constant_uint_meta(instances))); } else if (shader_stage == ir::ShaderStage::eHull) { ir::PrimitiveType prim = {}; ir::PrimitiveType domain = {}; ir::TessWindingOrder winding = {}; ir::TessPartitioning partitioning = {}; uint32_t input_control_points = 0; uint32_t output_control_points = 0; for (auto &op : builder) { switch (op.getOpCode()) { case ir::OpCode::eSetTessControlPoints: input_control_points = uint32_t(op.getOperand(1)); output_control_points = uint32_t(op.getOperand(2)); break; case ir::OpCode::eSetTessPrimitive: prim = ir::PrimitiveType(op.getOperand(1)); winding = ir::TessWindingOrder(op.getOperand(2)); partitioning = ir::TessPartitioning(op.getOperand(3)); break; case ir::OpCode::eSetTessDomain: domain = ir::PrimitiveType(op.getOperand(1)); break; default: break; } } flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::HSState))); flag_ops.push_back(create_md_node( patch_control_func ? create_constant_meta(patch_control_func) : create_null_meta(), create_constant_uint_meta(input_control_points), create_constant_uint_meta(output_control_points), create_constant_uint_meta(uint32_t(convert_hull_domain(domain))), create_constant_uint_meta(uint32_t(convert_hull_partitioning(partitioning))), create_constant_uint_meta(uint32_t(convert_hull_output_primitive(prim, winding))))); } else if (shader_stage == ir::ShaderStage::eDomain) { ir::PrimitiveType domain = {}; for_all_opcodes(builder, ir::OpCode::eSetTessDomain, [&](const ir::Op &op) { domain = ir::PrimitiveType(op.getOperand(1)); return false; }); flag_ops.push_back(create_constant_uint_meta(uint32_t(DXIL::ShaderPropertyTag::DSState))); flag_ops.push_back(create_md_node( create_constant_uint_meta(uint32_t(convert_hull_domain(domain))), create_constant_uint_meta(32 /* somewhat irrelevant? */))); } return flag_ops.empty() ? create_null_meta() : create_md_node(std::move(flag_ops)); } void ParseContext::set_function_attributes(Function *func) { Vector> attrs; for_all_opcodes(builder, ir::OpCode::eSetFpMode, [&](const ir::Op &op) { auto round = ir::RoundMode(op.getOperand(1)); auto denorm = ir::DenormMode(op.getOperand(2)); const char *round_mode = nullptr; const char *denorm_mode = nullptr; switch (op.getType().getBaseType(0).getBaseType()) { case ir::ScalarType::eF16: round_mode = "dxbc-fp16-round-mode"; denorm_mode = "dxbc-fp16-denorm-mode"; break; case ir::ScalarType::eF32: round_mode = "dxbc-fp32-round-mode"; denorm_mode = "dxbc-fp32-denorm-mode"; break; case ir::ScalarType::eF64: round_mode = "dxbc-fp64-round-mode"; denorm_mode = "dxbc-fp64-denorm-mode"; break; default: break; } if (round == ir::RoundMode::eZero) attrs.emplace_back(round_mode, "rtz"); else if (round == ir::RoundMode::eNearestEven) attrs.emplace_back(round_mode, "rte"); if (denorm == ir::DenormMode::eFlush) attrs.emplace_back(denorm_mode, "ftz"); else if (denorm == ir::DenormMode::ePreserve) attrs.emplace_back(denorm_mode, "preserve"); global_fp_flags |= op.getFlags(); return true; }); func->set_attributes(std::move(attrs)); } bool ParseContext::emit_entry_point() { const ir::Op *entry = nullptr; for_all_opcodes(builder, ir::OpCode::eEntryPoint, [&](const ir::Op &op) { entry = &op; return false; }); if (!entry) return false; shader_stage = ir::ShaderStage(entry->getOperand(entry->getFirstLiteralOperandIndex())); Function *patch_control_func = nullptr; // Process patch constant func first so we can emit metadata. for (uint32_t i_plus1 = entry->getFirstLiteralOperandIndex(); i_plus1; i_plus1--) { auto i = i_plus1 - 1; auto ssa = ir::SsaDef(entry->getOperand(i)); Type *type = convert_type(entry->getType()); // Entry points don't take arguments. auto *func_type = context.construct(context, type, Vector{}); auto *func = context.construct(func_type, ++tween_id, module); module.add_value_name(tween_id, i == 0 ? "main" : "patchMain"); if (i == 1) patch_control_func = func; // We're not barbarians. func->set_structured_control_flow(); function_map[ssa] = func; if (i == 0) { create_named_md_node("dx.entryPoints", create_md_node(create_constant_meta(func), create_string_meta("main"), create_stage_io_meta(), create_null_meta(), create_entry_point_meta(patch_control_func))); set_function_attributes(func); } } auto *name = create_string_meta("dxbc-spirv"); create_named_md_node("llvm.ident", create_md_node(name)); const char *stage_str = shader_stage_to_meta(shader_stage); auto *stage_type = create_string_meta(stage_str); auto *major = create_constant_uint_meta(6); auto *minor = create_constant_uint_meta(0); create_named_md_node("dx.shaderModel", create_md_node(stage_type, major, minor)); return true; } uint32_t ParseContext::build_texture_srv( uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, DXIL::ComponentType type) { uint32_t ret = srvs.nodes.size(); auto *srv = create_md_node( create_constant_uint_meta(ret), create_null_meta(), create_string_meta(""), create_constant_uint_meta(space), create_constant_uint_meta(index), create_constant_uint_meta(size), create_constant_uint_meta(uint32_t(kind)), create_null_meta(), // SRV sample count? We don't care about that. create_md_node( create_constant_uint_meta(0), create_constant_uint_meta(uint32_t(type)))); srvs.nodes.push_back(srv); return ret; } uint32_t ParseContext::build_texture_uav( uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, DXIL::ComponentType type, bool coherent, bool rov) { uint32_t ret = uavs.nodes.size(); auto *uav = create_md_node( create_constant_uint_meta(ret), create_null_meta(), create_string_meta(""), create_constant_uint_meta(space), create_constant_uint_meta(index), create_constant_uint_meta(size), create_constant_uint_meta(uint32_t(kind)), create_constant_uint_meta(coherent), create_constant_uint_meta(false), create_constant_uint_meta(rov), create_md_node( create_constant_uint_meta(0), create_constant_uint_meta(uint32_t(type)))); uavs.nodes.push_back(uav); return ret; } uint32_t ParseContext::build_buffer_uav( uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, uint32_t stride, bool coherent, bool counter, bool rov) { uint32_t ret = uavs.nodes.size(); auto *uav = create_md_node( create_constant_uint_meta(ret), create_null_meta(), create_string_meta(""), create_constant_uint_meta(space), create_constant_uint_meta(index), create_constant_uint_meta(size), create_constant_uint_meta(uint32_t(kind)), create_constant_uint_meta(coherent), create_constant_uint_meta(counter), create_constant_uint_meta(rov), create_md_node( create_constant_uint_meta(1), create_constant_uint_meta(stride))); uavs.nodes.push_back(uav); return ret; } uint32_t ParseContext::build_buffer_srv( uint32_t space, uint32_t index, uint32_t size, DXIL::ResourceKind kind, uint32_t stride) { uint32_t ret = srvs.nodes.size(); auto *srv = create_md_node( create_constant_uint_meta(ret), create_null_meta(), create_string_meta(""), create_constant_uint_meta(space), create_constant_uint_meta(index), create_constant_uint_meta(size), create_constant_uint_meta(uint32_t(kind)), create_null_meta(), // SRV sample count? We don't care about that. create_md_node( create_constant_uint_meta(1), create_constant_uint_meta(stride))); srvs.nodes.push_back(srv); return ret; } uint32_t ParseContext::build_sampler(uint32_t space, uint32_t index, uint32_t size) { uint32_t ret = samplers.nodes.size(); auto *sampler = create_md_node( create_constant_uint_meta(ret), create_null_meta(), create_string_meta(""), create_constant_uint_meta(space), create_constant_uint_meta(index), create_constant_uint_meta(size)); samplers.nodes.push_back(sampler); return ret; } uint32_t ParseContext::build_cbv( uint32_t space, uint32_t index, uint32_t size, uint32_t cbv_size) { uint32_t ret = cbvs.nodes.size(); auto *sampler = create_md_node( create_constant_uint_meta(ret), create_null_meta(), create_string_meta(""), create_constant_uint_meta(space), create_constant_uint_meta(index), create_constant_uint_meta(size), create_constant_uint_meta(cbv_size)); cbvs.nodes.push_back(sampler); return ret; } uint32_t ParseContext::build_stage_io( MetadataMapping &mapping, ir::SsaDef ssa, const String &name, DXIL::ComponentType type, DXIL::Semantic semantic, uint32_t semantic_index, DXIL::InterpolationMode interpolation, uint32_t rows, uint32_t cols, uint32_t start_row, uint32_t start_col, uint32_t stream, bool need_axis) { uint32_t ret = mapping.nodes.size(); stage_io_map[ssa] = { ret, DXIL::Op::Count, need_axis }; MDOperand *stream_meta; if (stream != UINT32_MAX) { stream_meta = create_md_node( create_constant_uint_meta(uint32_t(DXIL::GSStageOutTags::Stream)), create_constant_uint_meta(stream)); } else stream_meta = create_null_meta(); auto *input = create_md_node( create_constant_uint_meta(ret), create_string_meta(name), create_constant_uint_meta(uint32_t(type)), create_constant_uint_meta(uint32_t(semantic)), semantic_index ? create_md_node(create_constant_uint_meta(semantic_index)) : create_null_meta(), create_constant_uint_meta(uint32_t(interpolation)), create_constant_uint_meta(rows), create_constant_uint_meta(cols), create_constant_uint_meta(start_row), create_constant_uint_meta(start_col), stream_meta); mapping.nodes.push_back(input); return ret; } bool ParseContext::emit_metadata() { UnorderedSet uav_counters; for (auto &op : builder) if (op.getOpCode() == ir::OpCode::eDclUavCounter) uav_counters.insert(ir::SsaDef(op.getOperand(1))); for (auto &op : builder) { switch (op.getOpCode()) { case ir::OpCode::eDclCbv: { uint32_t space = uint32_t(op.getOperand(1)); uint32_t binding = uint32_t(op.getOperand(2)); uint32_t count = uint32_t(op.getOperand(3)); if (!count) count = UINT32_MAX; uint32_t cbv_size = op.getType().byteSize(); uint32_t index = build_cbv(space, binding, count, cbv_size); resource_map[op.getDef()] = { DXIL::ResourceType::CBV, DXIL::ResourceKind::CBuffer, index, binding }; break; } case ir::OpCode::eDclSampler: { uint32_t space = uint32_t(op.getOperand(1)); uint32_t binding = uint32_t(op.getOperand(2)); uint32_t count = uint32_t(op.getOperand(3)); if (!count) count = UINT32_MAX; uint32_t index = build_sampler(space, binding, count); resource_map[op.getDef()] = { DXIL::ResourceType::Sampler, DXIL::ResourceKind::Sampler, index, binding }; break; } case ir::OpCode::eDclSrv: case ir::OpCode::eDclUav: { uint32_t space = uint32_t(op.getOperand(1)); uint32_t binding = uint32_t(op.getOperand(2)); uint32_t count = uint32_t(op.getOperand(3)); if (!count) count = UINT32_MAX; auto kind = convert_resource_kind(ir::ResourceKind(uint32_t(op.getOperand(4)))); bool srv = op.getOpCode() == ir::OpCode::eDclSrv; uint32_t index; ir::UavFlag uav_flags = {}; if (!srv) uav_flags = ir::UavFlag(op.getOperand(5)); if (kind == DXIL::ResourceKind::RawBuffer || kind == DXIL::ResourceKind::StructuredBuffer) { uint32_t stride = 0; if (kind == DXIL::ResourceKind::StructuredBuffer) { if (op.getType().getArrayDimensions() != 2) { LOGE("Expected 2 array dimensions.\n"); return false; } stride = op.getType().getArraySize(0) * 4; } else { if (op.getType().getArrayDimensions() != 1) { LOGE("Expected 1 array dimension.\n"); return false; } } if (op.getType().getArraySize(op.getType().getArrayDimensions() - 1) != 0) { LOGE("Last dimension must be unsized.\n"); return false; } if (op.getType().getBaseType(0).byteSize() != 4) { LOGE("Expected 4 byte base type for raw buffers.\n"); return false; } if (srv) { index = build_buffer_srv(space, binding, count, kind, stride); } else { index = build_buffer_uav(space, binding, count, kind, stride, bool(uav_flags & ir::UavFlag::eCoherent), uav_counters.count(op.getDef()) != 0, bool(uav_flags & ir::UavFlag::eRasterizerOrdered)); } } else { auto mapping = convert_component_mapping(op.getType(), false); if (srv) { index = build_texture_srv(space, binding, count, kind, mapping.type); } else { index = build_texture_uav(space, binding, count, kind, mapping.type, bool(uav_flags & ir::UavFlag::eCoherent), bool(uav_flags & ir::UavFlag::eRasterizerOrdered)); } } resource_map[op.getDef()] = { srv ? DXIL::ResourceType::SRV : DXIL::ResourceType::UAV, kind, index, binding }; break; } default: break; } } create_named_md_node("dx.resources", create_md_node( srvs.nodes.empty() ? create_null_meta() : create_md_node(srvs.nodes), uavs.nodes.empty() ? create_null_meta() : create_md_node(uavs.nodes), cbvs.nodes.empty() ? create_null_meta() : create_md_node(cbvs.nodes), samplers.nodes.empty() ? create_null_meta() : create_md_node(samplers.nodes))); return true; } bool ParseContext::emit_function_bodies() { Vector bbs; Function *func = nullptr; for (auto &op : builder) { switch (op.getOpCode()) { case ir::OpCode::eEntryPoint: case ir::OpCode::eDebugName: case ir::OpCode::eDebugMemberName: break; case ir::OpCode::eDclSpecConstant: case ir::OpCode::eDclPushData: case ir::OpCode::eDclTmp: case ir::OpCode::eScopedIf: case ir::OpCode::eScopedElse: case ir::OpCode::eScopedEndIf: case ir::OpCode::eScopedLoop: case ir::OpCode::eScopedLoopBreak: case ir::OpCode::eScopedLoopContinue: case ir::OpCode::eScopedEndLoop: case ir::OpCode::eScopedSwitch: case ir::OpCode::eScopedSwitchCase: case ir::OpCode::eScopedSwitchDefault: case ir::OpCode::eScopedSwitchBreak: case ir::OpCode::eScopedEndSwitch: case ir::OpCode::eConsumeAs: case ir::OpCode::eTmpLoad: case ir::OpCode::eTmpStore: case ir::OpCode::ePushDataLoad: case ir::OpCode::eMemoryLoad: case ir::OpCode::eMemoryStore: case ir::OpCode::eMemoryAtomic: case ir::OpCode::ePointer: case ir::OpCode::eFMulLegacy: case ir::OpCode::eFMadLegacy: case ir::OpCode::eFDot: case ir::OpCode::eFDotLegacy: case ir::OpCode::eFPowLegacy: case ir::OpCode::eUMSad: case ir::OpCode::eDrain: LOGE("Opcode %u should not appear in final IR at this point.\n", unsigned(op.getOpCode())); return false; case ir::OpCode::eDclXfb: case ir::OpCode::eRovScopedLockBegin: case ir::OpCode::eRovScopedLockEnd: // Should not appear, but we can just ignore it since it has no semantic impact at this stage. // ROV is done automatically by dxil-spirv path already, so ignore that here. break; case ir::OpCode::eDclInput: case ir::OpCode::eDclInputBuiltIn: case ir::OpCode::eDclOutput: case ir::OpCode::eDclOutputBuiltIn: case ir::OpCode::eDclSrv: case ir::OpCode::eDclUav: case ir::OpCode::eDclUavCounter: case ir::OpCode::eDclCbv: case ir::OpCode::eDclSampler: case ir::OpCode::eSemantic: case ir::OpCode::eSetCsWorkgroupSize: case ir::OpCode::eSetPsDepthGreaterEqual: case ir::OpCode::eSetPsDepthLessEqual: case ir::OpCode::eSetGsInputPrimitive: case ir::OpCode::eSetGsOutputPrimitive: case ir::OpCode::eSetGsOutputVertices: case ir::OpCode::eSetGsInstances: case ir::OpCode::eSetTessControlPoints: case ir::OpCode::eSetTessDomain: case ir::OpCode::eSetTessPrimitive: case ir::OpCode::eSetFpMode: case ir::OpCode::eSetPsEarlyFragmentTest: break; case ir::OpCode::eConstant: if (!emit_constant(op)) return false; break; case ir::OpCode::eUndef: value_map[op.getDef()] = UndefValue::get(convert_type(op.getType())); break; // Functions case ir::OpCode::eDclParam: param_types[op.getDef()] = convert_type(op.getType()); break; case ir::OpCode::eFunction: { auto itr = function_map.find(op.getDef()); if (itr == function_map.end()) { Type *type = convert_type(op.getType()); Vector types; types.reserve(op.getOperandCount()); params.clear(); for (unsigned i = 0; i < op.getOperandCount(); i++) { auto *param_type = param_types[ir::SsaDef(op.getOperand(i))]; types.push_back(param_type); params.emplace_back(ir::SsaDef(op.getOperand(i)), param_type); } auto *func_type = context.construct(context, type, types); func = context.construct(func_type, ++tween_id, module); for (unsigned i = 0; i < op.getOperandCount(); i++) func->add_argument(context.construct(types[i], i)); func->set_structured_control_flow(); function_map[op.getDef()] = func; } else { func = itr->second; } break; } case ir::OpCode::eFunctionEnd: if (!func) { LOGE("Cannot end function without a function.\n"); return false; } func->set_basic_blocks(std::move(bbs)); module.add_function_implementation(func); bbs = {}; break; case ir::OpCode::eParamLoad: { if (!func) { LOGE("Cannot get parameter without a function.\n"); return false; } auto &func_op = builder.getOp(ir::SsaDef(op.getOperand(0))); auto param = ir::SsaDef(op.getOperand(1)); auto arg_iter = func->arg_begin(); for (uint32_t i = 0; i < func_op.getOperandCount(); i++, ++arg_iter) if (ir::SsaDef(func_op.getOperand(i)) == param) break; if (arg_iter == func->arg_end()) return false; auto &arg = *arg_iter; value_map[op.getDef()] = const_cast(&arg); break; } // Basic Blocks case ir::OpCode::eLabel: { auto *bb = get_basic_block(op.getDef()); current_bb = bb; bbs.push_back(bb); switch (ir::Construct(op.getOperand(op.getFirstLiteralOperandIndex()))) { case ir::Construct::eStructuredSelection: bb->set_selection_merge(get_basic_block(ir::SsaDef(op.getOperand(0)))); break; case ir::Construct::eStructuredLoop: bb->set_loop_merge(get_basic_block(ir::SsaDef(op.getOperand(0))), get_basic_block(ir::SsaDef(op.getOperand(1)))); break; default: break; } break; } case ir::OpCode::ePhi: { // We might not have emitted all inputs yet. Defer that to a fixup pass later. auto *phi = context.construct(convert_type(op.getType()), op.getOperandCount() / 2); push_instruction(phi, op.getDef()); break; } case ir::OpCode::eReturn: if (!current_bb) return false; if (op.getOperand(0)) push_instruction(context.construct(get_value(op.getOperand(0)))); else push_instruction(context.construct(nullptr)); current_bb = nullptr; break; case ir::OpCode::eBranch: { if (!current_bb) return false; auto *target = get_basic_block(ir::SsaDef(op.getOperand(0))); current_bb->add_successor(target); push_instruction(context.construct(target)); current_bb = nullptr; break; } case ir::OpCode::eBranchConditional: { if (!current_bb) return false; auto *value = get_value(op.getOperand(0)); auto *true_path = get_basic_block(ir::SsaDef(op.getOperand(1))); auto *false_path = get_basic_block(ir::SsaDef(op.getOperand(2))); current_bb->add_successor(true_path); current_bb->add_successor(false_path); push_instruction(context.construct(true_path, false_path, value)); current_bb = nullptr; break; } case ir::OpCode::eSwitch: { if (!current_bb) return false; auto *default_block = get_basic_block(ir::SsaDef(op.getOperand(1))); current_bb->add_successor(default_block); unsigned num_cases = (op.getOperandCount() - 2) / 2; auto *inst = context.construct(get_value(op.getOperand(0)), default_block, num_cases); for (unsigned i = 0; i < num_cases; i++) { auto *value = get_value(op.getOperand(2 * i + 2)); auto *case_label = get_basic_block(ir::SsaDef(op.getOperand(2 * i + 3))); current_bb->add_successor(case_label); inst->addCase(value, case_label); } push_instruction(inst); current_bb = nullptr; break; } case ir::OpCode::eUnreachable: if (!current_bb) return false; push_instruction(context.construct()); current_bb = nullptr; break; case ir::OpCode::eDclScratch: case ir::OpCode::eDclLds: { auto *type = convert_type(op.getType()); auto *value = context.construct( PointerType::get(type, uint32_t( op.getOpCode() == ir::OpCode::eDclLds ? DXIL::AddressSpace::GroupShared : DXIL::AddressSpace::Thread)), GlobalVariable::LinkageTypes::InternalLinkage, false); value_map[op.getDef()] = value; module.add_global_variable(value); break; } // Opcodes default: if (!current_bb) { LOGE("No BB to insert instructions into.\n"); return false; } if (!push_instruction(op)) return false; break; } } // Resolve PHI incoming values since we have value-defs for them now. for (auto &op : builder) { if (op.getOpCode() == ir::OpCode::ePhi) { auto *phi = cast(get_value(op.getDef())); for (uint32_t i = 0; i < op.getOperandCount(); i += 2) phi->add_incoming(get_value(op.getOperand(i + 1)), get_basic_block(ir::SsaDef(op.getOperand(i)))); } } return true; } Value *ParseContext::get_value(const ir::Operand &op) const { return get_value(ir::SsaDef(op)); } Value *ParseContext::get_value(const ir::SsaDef &op) const { auto itr = value_map.find(op); return itr == value_map.end() ? nullptr : itr->second; } BasicBlock *ParseContext::get_basic_block(ir::SsaDef ssa) { auto &bb = bb_map[ssa]; if (!bb) bb = context.construct(context); return bb; } ConstantInt *ParseContext::get_constant_uint(uint32_t value) { return ConstantInt::get(Type::getInt32Ty(context), value); } ConstantAsMetadata *ParseContext::create_constant_uint_meta(uint32_t value) { return create_constant_meta(get_constant_uint(value)); } ConstantAsMetadata *ParseContext::create_constant_uint64_meta(uint32_t value) { return create_constant_meta(ConstantInt::get(Type::getInt64Ty(context), value)); } ConstantAsMetadata *ParseContext::create_constant_meta(Constant *c) { return context.construct(&module, c); } MDString *ParseContext::create_string_meta(const String &str) { return context.construct(&module, str); } // Parses the highly simplified and SSA-ified IR coming from dxbc-spirv. Module *parseDXBCIR(LLVMContext &context, ir::Builder &builder) { auto *module = context.construct(context); ParseContext ctx(context, builder, *module); if (!ctx.emit_entry_point()) return nullptr; if (!ctx.emit_metadata()) return nullptr; if (!ctx.emit_function_bodies()) return nullptr; return module; } Module *parseDXBCBinary(LLVMContext &context, const void* data, size_t size) { ScopedLogger logger; dxbc::Converter::Options convertOptions = { }; convertOptions.includeDebugNames = false; convertOptions.boundCheckShaderIo = true; ir::CompileOptions options = { }; options.scratchOptions.enableBoundChecking = true; options.scratchOptions.resolveCbvCopy = false; options.scratchOptions.unpackConstantIndexedArrays = true; options.scratchOptions.unpackSmallArrays = true; options.arithmeticOptions.lowerDot = true; options.arithmeticOptions.lowerSinCos = false; options.arithmeticOptions.lowerMsad = true; options.arithmeticOptions.lowerF32toF16 = true; options.arithmeticOptions.lowerConvertFtoI = false; options.arithmeticOptions.lowerGsVertexCountIn = true; options.arithmeticOptions.hasNvUnsignedItoFBug = true; options.min16Options.enableFloat16 = true; options.min16Options.enableInt16 = true; options.resourceOptions.allowSubDwordScratchAndLds = false; options.resourceOptions.flattenLds = true; options.resourceOptions.flattenScratch = true; options.resourceOptions.structuredCbv = false; options.resourceOptions.structuredSrvUav = false; options.bufferOptions.useTypedForRaw = false; options.bufferOptions.useTypedForStructured = false; options.bufferOptions.useTypedForSparseFeedback = true; options.bufferOptions.useRawForTypedAtomic = false; options.scalarizeOptions.subDwordVectors = true; options.syncOptions.allowWorkgroupCoherence = false; options.syncOptions.insertRovLocks = false; options.syncOptions.insertLdsBarriers = false; options.syncOptions.insertUavBarriers = false; options.derivativeOptions.hoistNontrivialDerivativeOps = true; options.derivativeOptions.hoistNontrivialImplicitLodOps = false; options.derivativeOptions.hoistDescriptorLoads = false; options.cseOptions.relocateDescriptorLoad = false; options.descriptorIndexing.optimizeDescriptorIndexing = false; auto builder = dxbc::compileShaderToLegalizedIr(data, size, convertOptions, options); if (!builder) return nullptr; return parseDXBCIR(context, *builder); } } ================================================ FILE: bc/type.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "type.hpp" #include "cast.hpp" #include "context.hpp" #include namespace LLVMBC { PointerType::PointerType(Type *type, uint32_t addr_space) : Type(type->getContext(), TypeID::PointerTyID) , contained_type(type) { address_space = addr_space; } PointerType *PointerType::get(Type *pointee, unsigned addr_space) { auto &context = pointee->getContext(); auto &cache = context.get_type_cache(); for (auto *type : cache) { if (type->getTypeID() == TypeID::PointerTyID) { auto *pointer_type = cast(type); if (pointer_type->getAddressSpace() == addr_space && pointer_type->getElementType() == pointee) return pointer_type; } } auto *type = context.construct(pointee, addr_space); cache.push_back(type); return type; } unsigned Type::getAddressSpace() const { return address_space; } Type *PointerType::getElementType() const { return contained_type; } ArrayType::ArrayType(Type *type, uint64_t elements_) : Type(type->getContext(), TypeID::ArrayTyID) , contained_type(type) , elements(elements_) { } ArrayType *ArrayType::get(Type *element, uint64_t size) { auto &context = element->getContext(); auto &cache = context.get_type_cache(); for (auto *type : cache) { if (type->getTypeID() == TypeID::ArrayTyID) { auto *array_type = cast(type); if (array_type->getArrayNumElements() == size && array_type->getArrayElementType() == element) return array_type; } } auto *type = context.construct(element, size); cache.push_back(type); return type; } VectorType::VectorType(LLVMBC::LLVMContext &context, unsigned vector_size_, LLVMBC::Type *type) : Type(context, TypeID::VectorTyID) , element_type(type) , vector_size(vector_size_) { } unsigned VectorType::getVectorSize() const { return vector_size; } Type *VectorType::getElementType() const { return element_type; } VectorType *VectorType::get(unsigned vector_size, Type *element) { auto &context = element->getContext(); auto &cache = context.get_type_cache(); for (auto *type : cache) { if (type->getTypeID() == TypeID::VectorTyID) { auto *vector_type = cast(type); if (vector_type->getVectorSize() == vector_size && vector_type->getElementType() == element) return vector_type; } } auto *type = context.construct(context, vector_size, element); cache.push_back(type); return type; } uint64_t Type::getArrayNumElements() const { assert(type_id == TypeID::ArrayTyID); return cast(this)->elements; } unsigned Type::getVectorNumElements() const { assert(type_id == TypeID::VectorTyID); return cast(this)->getVectorSize(); } unsigned Type::getPointerAddressSpace() const { assert(type_id == TypeID::PointerTyID); return cast(this)->getAddressSpace(); } Type *Type::getArrayElementType() const { assert(type_id == TypeID::ArrayTyID); return cast(this)->contained_type; } Type *Type::getStructElementType(unsigned index) const { assert(type_id == TypeID::StructTyID); return cast(this)->getElementType(index); } unsigned Type::getStructNumElements() const { assert(type_id == TypeID::StructTyID); return cast(this)->getNumElements(); } unsigned Type::getIntegerBitWidth() const { assert(type_id == TypeID::IntegerTyID); return cast(this)->getBitWidth(); } Type *Type::getPointerElementType() const { assert(type_id == TypeID::PointerTyID); return cast(this)->getElementType(); } StructType::StructType(LLVMContext &context, Vector member_types_) : Type(context, TypeID::StructTyID) , member_types(std::move(member_types_)) { } unsigned StructType::getNumElements() const { return member_types.size(); } Type *StructType::getElementType(unsigned N) const { assert(N < member_types.size()); return member_types[N]; } StructType *StructType::get(LLVMContext &context, Vector member_types) { auto &cache = context.get_type_cache(); for (auto *type : cache) { if (type->getTypeID() == TypeID::StructTyID) { auto *struct_type = cast(type); if (struct_type->getNumElements() == member_types.size()) { bool equal = true; unsigned count = member_types.size(); for (unsigned i = 0; i < count; i++) { if (member_types[i] != struct_type->getElementType(i)) { equal = false; break; } } if (equal) return struct_type; } } } auto *type = context.construct(context, std::move(member_types)); cache.push_back(type); return type; } FunctionType::FunctionType(LLVMContext &context, Type *return_type_, Vector argument_types_) : Type(context, TypeID::FunctionTyID) , return_type(return_type_) , argument_types(std::move(argument_types_)) { } unsigned FunctionType::getNumParams() const { return unsigned(argument_types.size()); } Type *FunctionType::getParamType(unsigned index) const { assert(index < argument_types.size()); return argument_types[index]; } Type *FunctionType::getReturnType() const { return return_type; } IntegerType::IntegerType(LLVMContext &context, uint32_t width_) : Type(context, TypeID::IntegerTyID) , width(width_) { } uint32_t IntegerType::getBitWidth() const { return width; } Type::Type(LLVMContext &context_, TypeID type_id_) : context(context_) , type_id(type_id_) { } Type::TypeID Type::getTypeID() const { return type_id; } Type *Type::getIntTy(LLVMContext &context, uint32_t width) { auto &cache = context.get_type_cache(); for (auto *type : cache) if (type->getTypeID() == TypeID::IntegerTyID && cast(type)->getBitWidth() == width) return type; auto *type = context.construct(context, width); cache.push_back(type); return type; } Type *Type::getTy(LLVMContext &context, TypeID id) { auto &cache = context.get_type_cache(); for (auto *type : cache) if (type->getTypeID() == id) return type; auto *type = context.construct(context, id); cache.push_back(type); return type; } Type *Type::getVoidTy(LLVMContext &context) { return getTy(context, TypeID::VoidTyID); } Type *Type::getHalfTy(LLVMContext &context) { return getTy(context, TypeID::HalfTyID); } Type *Type::getFloatTy(LLVMContext &context) { return getTy(context, TypeID::FloatTyID); } Type *Type::getDoubleTy(LLVMContext &context) { return getTy(context, TypeID::DoubleTyID); } Type *Type::getLabelTy(LLVMContext &context) { return getTy(context, TypeID::LabelTyID); } Type *Type::getOpaqueTy(LLVMBC::LLVMContext &context) { return getTy(context, TypeID::OpaqueTyID); } Type *Type::getMetadataTy(LLVMContext &context) { return getTy(context, TypeID::MetadataTyID); } Type *Type::getInt1Ty(LLVMContext &context) { return getIntTy(context, 1); } Type *Type::getInt8Ty(LLVMContext &context) { return getIntTy(context, 8); } Type *Type::getInt16Ty(LLVMContext &context) { return getIntTy(context, 16); } Type *Type::getInt32Ty(LLVMContext &context) { return getIntTy(context, 32); } Type *Type::getInt64Ty(LLVMContext &context) { return getIntTy(context, 64); } bool Type::isIntegerTy() const { return type_id == TypeID::IntegerTyID; } bool Type::isFloatingPointTy() const { return type_id == TypeID::HalfTyID || type_id == TypeID::FloatTyID || type_id == TypeID::DoubleTyID; } LLVMContext &Type::getContext() { return context; } } // namespace LLVMBC ================================================ FILE: bc/type.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "data_structures.hpp" #include namespace LLVMBC { class LLVMContext; class Type { public: enum class TypeID { Unknown, VoidTyID, HalfTyID, FloatTyID, DoubleTyID, IntegerTyID, PointerTyID, ArrayTyID, StructTyID, FunctionTyID, LabelTyID, VectorTyID, MetadataTyID, OpaqueTyID }; Type(LLVMContext &context, TypeID type_id); LLVMContext &getContext(); TypeID getTypeID() const; static Type *getVoidTy(LLVMContext &context); static Type *getHalfTy(LLVMContext &context); static Type *getFloatTy(LLVMContext &context); static Type *getDoubleTy(LLVMContext &context); static Type *getInt1Ty(LLVMContext &context); static Type *getInt8Ty(LLVMContext &context); static Type *getInt16Ty(LLVMContext &context); static Type *getInt32Ty(LLVMContext &context); static Type *getInt64Ty(LLVMContext &context); static Type *getIntTy(LLVMContext &context, uint32_t width); static Type *getLabelTy(LLVMContext &context); static Type *getMetadataTy(LLVMContext &context); static Type *getOpaqueTy(LLVMContext &context); bool isIntegerTy() const; bool isFloatingPointTy() const; uint64_t getArrayNumElements() const; Type *getArrayElementType() const; Type *getPointerElementType() const; Type *getStructElementType(unsigned index) const; unsigned getStructNumElements() const; unsigned getIntegerBitWidth() const; unsigned getAddressSpace() const; unsigned getVectorNumElements() const; unsigned getPointerAddressSpace() const; protected: LLVMContext &context; TypeID type_id; static Type *getTy(LLVMContext &context, TypeID id); unsigned address_space = 0; }; class PointerType : public Type { public: static constexpr TypeID get_type_id() { return TypeID::PointerTyID; } PointerType(Type *type, unsigned addr_space); static PointerType *get(Type *pointee, unsigned addr_space); Type *getElementType() const; private: Type *contained_type = nullptr; }; class ArrayType : public Type { public: static constexpr TypeID get_type_id() { return TypeID::ArrayTyID; } ArrayType(Type *type, uint64_t elements); static ArrayType *get(Type *element, uint64_t size); private: friend class Type; Type *contained_type = nullptr; uint64_t elements = 0; }; class IntegerType : public Type { public: static constexpr TypeID get_type_id() { return TypeID::IntegerTyID; } IntegerType(LLVMContext &context, uint32_t width); uint32_t getBitWidth() const; private: uint32_t width = 0; }; class StructType : public Type { public: static constexpr TypeID get_type_id() { return TypeID::StructTyID; } StructType(LLVMContext &context, Vector member_types); static StructType *get(LLVMContext &context, Vector member_types); unsigned getNumElements() const; Type *getElementType(unsigned N) const; private: Vector member_types; }; class VectorType : public Type { public: static constexpr TypeID get_type_id() { return TypeID::VectorTyID; } VectorType(LLVMContext &context, unsigned vector_size, Type *type); static VectorType *get(unsigned vector_size, Type *type); unsigned getVectorSize() const; Type *getElementType() const; private: Type *element_type; unsigned vector_size; }; class FunctionType : public Type { public: static constexpr TypeID get_type_id() { return TypeID::FunctionTyID; } FunctionType(LLVMContext &context, Type *return_type, Vector argument_types); unsigned getNumParams() const; Type *getParamType(unsigned index) const; Type *getReturnType() const; private: Type *return_type = nullptr; Vector argument_types; }; } // namespace LLVMBC ================================================ FILE: bc/value.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "value.hpp" #include "cast.hpp" #include "context.hpp" #include "instruction.hpp" #include "logging.hpp" #include "type.hpp" #include namespace LLVMBC { Value::Value(Type *type_, ValueKind kind_) : type(type_) , kind(kind_) { } void Value::set_tween_id(uint64_t id) { tween_id = id; } uint64_t Value::get_tween_id() const { return tween_id; } Type *Value::getType() const { return type; } ValueKind Value::get_value_kind() const { return kind; } Argument::Argument(Type *type, unsigned argument_number_) : Value(type, ValueKind::Argument), argument_number(argument_number_) { } unsigned Argument::getArgNo() const { return argument_number; } bool Constant::is_base_of_value_kind(ValueKind kind) { switch (kind) { case ValueKind::ConstantFP: case ValueKind::ConstantInt: case ValueKind::ConstantDataArray: case ValueKind::ConstantDataVector: case ValueKind::ConstantAggregate: case ValueKind::ConstantAggregateZero: case ValueKind::ConstantBase: case ValueKind::Undef: case ValueKind::Function: case ValueKind::Global: case ValueKind::ConstantExpr: return true; default: return false; } } Constant::Constant(Type *type, ValueKind kind) : Value(type, kind) { } ConstantInt *ConstantInt::get(Type *type, uint64_t value) { auto &context = type->getContext(); return context.construct(type, value); } const APInt &Constant::getUniqueInteger() const { return apint; } void Constant::set_integer(const APInt &apint_) { apint = apint_; } void Constant::set_float(const APFloat &apfloat_) { apfloat = apfloat_; } APInt::APInt(Type *type_, uint64_t value_) : type(type_) , value(value_) { } APFloat::APFloat(Type *type_, uint64_t value_) : type(type_) , value(value_) { } int64_t APInt::getSExtValue() const { auto width = cast(type)->getBitWidth(); if (width == 64) return int64_t(value); auto mask = (1ull << width) - 1; bool sign_bit = ((value >> (width - 1)) & 1) != 0; uint64_t extended = sign_bit ? ~mask : 0ull; return int64_t((value & mask) | extended); } uint64_t APInt::getZExtValue() const { auto width = cast(type)->getBitWidth(); if (width == 64) return value; auto mask = (1ull << width) - 1u; return value & mask; } uint64_t APInt::get_raw_value() const { return value; } ConstantFP *ConstantFP::get(Type *type, uint64_t value) { auto &context = type->getContext(); return context.construct(type, value); } ConstantInt::ConstantInt(Type *type, uint64_t value) : Constant(type, ValueKind::ConstantInt) { set_integer(APInt(type, value)); } ConstantFP::ConstantFP(Type *type, uint64_t value) : Constant(type, ValueKind::ConstantFP) { set_float(APFloat(type, value)); } const APFloat &Constant::getValueAPF() const { return apfloat; } float APFloat::convertToFloat() const { switch (type->getTypeID()) { case Type::TypeID::FloatTyID: { float f; auto u = uint32_t(value); static_assert(sizeof(f) == sizeof(u), "Float is not 32-bit."); memcpy(&f, &u, sizeof(float)); return f; } case Type::TypeID::DoubleTyID: { double f; static_assert(sizeof(f) == sizeof(value), "Double is not 64-bit."); memcpy(&f, &value, sizeof(double)); return float(f); } default: LOGE("Unknown FP type in APFloat::convertToFloat().\n"); return 0.0f; } } APInt APFloat::bitcastToAPInt() const { Type *int_type = nullptr; switch (type->getTypeID()) { case Type::TypeID::HalfTyID: int_type = Type::getInt16Ty(type->getContext()); break; case Type::TypeID::FloatTyID: int_type = Type::getInt32Ty(type->getContext()); break; case Type::TypeID::DoubleTyID: int_type = Type::getInt64Ty(type->getContext()); break; default: break; } return { int_type, value }; } double APFloat::convertToDouble() const { switch (type->getTypeID()) { case Type::TypeID::FloatTyID: { float f; auto u = uint32_t(value); static_assert(sizeof(f) == sizeof(u), "Float is not 32-bit."); memcpy(&f, &u, sizeof(float)); return double(f); } case Type::TypeID::DoubleTyID: { double f; static_assert(sizeof(f) == sizeof(value), "Double is not 64-bit."); memcpy(&f, &value, sizeof(double)); return f; } default: LOGE("Unknown FP type in APFloat::convertToDouble().\n"); return 0.0f; } } UndefValue::UndefValue(Type *type) : Constant(type, ValueKind::Undef) { } UndefValue *UndefValue::get(Type *type) { auto &context = type->getContext(); return context.construct(type); } ConstantAggregateZero::ConstantAggregateZero(Type *type) : Constant(type, ValueKind::ConstantAggregateZero) { } ConstantPointerNull::ConstantPointerNull(Type *type) : Constant(type, ValueKind::ConstantPointerNull) { } ConstantDataArray::ConstantDataArray(Type *type, Vector elements_) : Constant(type, ValueKind::ConstantDataArray) , elements(std::move(elements_)) { } unsigned ConstantDataArray::getNumElements() const { return elements.size(); } Constant *ConstantDataArray::getElementAsConstant(unsigned index) const { return cast(elements[index]); } ConstantDataVector::ConstantDataVector(Type *type, Vector elements_) : Constant(type, ValueKind::ConstantDataVector) , elements(std::move(elements_)) { } unsigned ConstantDataVector::getNumElements() const { return elements.size(); } Constant *ConstantDataVector::getElementAsConstant(unsigned index) const { return cast(elements[index]); } ConstantAggregate::ConstantAggregate(Type *type, Vector elements_) : Constant(type, ValueKind::ConstantAggregate) , elements(std::move(elements_)) { } unsigned ConstantAggregate::getNumOperands() const { return elements.size(); } Constant *ConstantAggregate::getOperand(unsigned index) const { return cast(elements[index]); } ConstantExpr::ConstantExpr(unsigned opcode_, Type *type, Vector elements_) : Constant(type, ValueKind::ConstantExpr) , opcode(opcode_) , elements(std::move(elements_)) { } unsigned ConstantExpr::getOpcode() const { return opcode; } Constant *ConstantExpr::getOperand(unsigned int N) const { return cast(elements[N]); } unsigned ConstantExpr::getNumOperands() const { return unsigned(elements.size()); } GlobalVariable::GlobalVariable(Type *type, LinkageTypes linkage_, bool is_const_) : Constant(type, ValueKind::Global) , linkage(linkage_) , is_const(is_const_) { } GlobalVariable::LinkageTypes GlobalVariable::getLinkage() const { return linkage; } bool GlobalVariable::hasInitializer() const { return initializer != nullptr; } Constant *GlobalVariable::getInitializer() const { return initializer; } void GlobalVariable::set_initializer(Constant *value) { initializer = value; } bool GlobalVariable::isConstant() const { return is_const; } } // namespace LLVMBC ================================================ FILE: bc/value.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "data_structures.hpp" #include namespace LLVMBC { class Type; enum class ValueKind { Argument, Function, InstructionBase, ConstantBase, ConstantInt, ConstantFP, ConstantAggregateZero, ConstantPointerNull, ConstantAggregate, ConstantDataArray, ConstantDataVector, ConstantExpr, Undef, UnaryOperator, BinaryOperator, Call, CompareBase, FCmp, ICmp, BasicBlock, PHI, Cast, Select, ExtractValue, Alloca, GetElementPtr, Load, Store, AtomicRMW, AtomicCmpXchg, Return, Unreachable, Branch, Switch, Proxy, Global, ShuffleVector, ExtractElement, InsertElement, CompositeConstruct }; #define LLVMBC_DEFAULT_VALUE_KIND_IMPL \ static bool is_base_of_value_kind(ValueKind kind) \ { \ return get_value_kind() == kind; \ } class Value { public: Value(Type *type, ValueKind kind); Type *getType() const; ValueKind get_value_kind() const; void set_tween_id(uint64_t id); uint64_t get_tween_id() const; protected: Type *type; ValueKind kind; uint64_t tween_id = 0; }; class Argument : public Value { public: Argument(Type *type, unsigned argument_number); unsigned getArgNo() const; static constexpr ValueKind get_value_kind() { return ValueKind::Argument; } LLVMBC_DEFAULT_VALUE_KIND_IMPL private: unsigned argument_number; }; class APInt { public: APInt() = default; APInt(Type *type, uint64_t value); uint64_t getZExtValue() const; int64_t getSExtValue() const; // LLVMBC specific hack to make minprecision with signed ints work. // We need a sign-extended value which fortunately the DXIL emits, // but LLVM itself will mask off the bits for you. uint64_t get_raw_value() const; private: Type *type = nullptr; uint64_t value = 0; }; class APFloat { public: APFloat() = default; APFloat(Type *type, uint64_t value); float convertToFloat() const; double convertToDouble() const; APInt bitcastToAPInt() const; private: Type *type = nullptr; uint64_t value = 0; }; class Constant : public Value { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantBase; } Constant(Type *type, ValueKind kind); void set_integer(const APInt &apint); void set_float(const APFloat &apfloat); const APFloat &getValueAPF() const; const APInt &getUniqueInteger() const; static bool is_base_of_value_kind(ValueKind kind); private: APInt apint; APFloat apfloat; }; class ConstantInt : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantInt; } static ConstantInt *get(Type *type, uint64_t value); ConstantInt(Type *type, uint64_t value); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class ConstantFP : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantFP; } static ConstantFP *get(Type *type, uint64_t bits); ConstantFP(Type *type, uint64_t bits); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class ConstantAggregateZero : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantAggregateZero; } explicit ConstantAggregateZero(Type *type); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class ConstantPointerNull : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantPointerNull; } explicit ConstantPointerNull(Type *type); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class ConstantDataArray : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantDataArray; } ConstantDataArray(Type *type, Vector elements); unsigned getNumElements() const; Constant *getElementAsConstant(unsigned index) const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Vector elements; }; class ConstantDataVector : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantDataVector; } ConstantDataVector(Type *type, Vector elements); unsigned getNumElements() const; Constant *getElementAsConstant(unsigned index) const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Vector elements; }; class ConstantAggregate : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantAggregate; } ConstantAggregate(Type *type, Vector elements); unsigned getNumOperands() const; Constant *getOperand(unsigned index) const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Vector elements; }; class ConstantExpr : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::ConstantExpr; } ConstantExpr(unsigned opcode, Type *type, Vector elements); unsigned getOpcode() const; unsigned getNumOperands() const; Constant *getOperand(unsigned N) const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: unsigned opcode; Vector elements; }; class UndefValue : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::Undef; } explicit UndefValue(Type *type); static UndefValue *get(Type *type); LLVMBC_DEFAULT_VALUE_KIND_IMPL }; class GlobalVariable : public Constant { public: static constexpr ValueKind get_value_kind() { return ValueKind::Global; } enum LinkageTypes { ExternalLinkage, InternalLinkage, AppendingLinkage }; explicit GlobalVariable(Type *type, LinkageTypes linkage, bool is_const); void set_initializer(Constant *value); Constant *getInitializer() const; bool hasInitializer() const; bool isConstant() const; LinkageTypes getLinkage() const; LLVMBC_DEFAULT_VALUE_KIND_IMPL private: Constant *initializer = nullptr; LinkageTypes linkage; bool is_const; }; } // namespace LLVMBC ================================================ FILE: build_dxc.sh ================================================ #!/bin/bash PROFILE=Release if [ ! -z $1 ]; then PROFILE=$1 fi if [ ! -z $2 ]; then NPROC="--parallel $2" fi echo "Building DXC." mkdir -p external/dxc-build cd external/dxc-build # CLANG_FORMAT_EXE=OFF avoids a broken build where it expects clang-format to produce exact results for some dumb reason. cmake ../DirectXShaderCompiler -DCMAKE_BUILD_TYPE=$PROFILE -C ../DirectXShaderCompiler/cmake/caches/PredefinedParams.cmake -G Ninja -DSPIRV_WERROR=OFF -DCLANG_FORMAT_EXE=OFF -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ cmake --build . --config $PROFILE ${NPROC} ================================================ FILE: cfg_structurizer.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cfg_structurizer.hpp" #include "SpvBuilder.h" #include "logging.hpp" #include "node.hpp" #include "node_pool.hpp" #include "spirv_module.hpp" #include #include namespace dxil_spv { CFGStructurizer::CFGStructurizer(CFGNode *entry, CFGNodePool &pool_, SPIRVModule &module_) : entry_block(entry) , pool(pool_) , module(module_) { exit_block = pool.create_node(); exit_block->name = "EXIT"; } void CFGStructurizer::log_cfg_graphviz(const char *path) const { FILE *file = fopen(path, "w"); if (!file) { LOGE("Failed to open graphviz dump path: %s\n", path); return; } UnorderedMap node_to_id; uint32_t accum_id = 0; const auto get_node_id = [&](const CFGNode *node) -> uint32_t { auto itr = node_to_id.find(node); if (itr == node_to_id.end()) { const char *shape = nullptr; if (node->merge == MergeType::Loop) shape = "circle"; else if (node->merge == MergeType::Selection) shape = "triangle"; else shape = "box"; node_to_id[node] = ++accum_id; fprintf(file, "%u [label=\"%s\", shape=\"%s\"];\n", accum_id, node->name.c_str(), shape); return accum_id; } else return itr->second; }; fprintf(file, "digraph {\n"); for (auto index = forward_post_visit_order.size(); index; index--) { auto *node = forward_post_visit_order[index - 1]; switch (node->ir.terminator.type) { case Terminator::Type::Branch: fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(node->ir.terminator.direct_block)); break; case Terminator::Type::Condition: fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(node->ir.terminator.true_block)); fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(node->ir.terminator.false_block)); break; case Terminator::Type::Switch: for (auto &c : node->ir.terminator.cases) fprintf(file, "%u -> %u;\n", get_node_id(node), get_node_id(c.node)); break; default: break; } if (node->merge == MergeType::Loop) { if (node->pred_back_edge) fprintf(file, "%u -> %u [style=\"dotted\"];\n", get_node_id(node), get_node_id(node->pred_back_edge)); if (node->loop_merge_block) fprintf(file, "%u -> %u [style=\"dashed\"];\n", get_node_id(node), get_node_id(node->loop_merge_block)); } else if (node->merge == MergeType::Selection) { if (node->selection_merge_block) fprintf(file, "%u -> %u [style=\"dashed\"];\n", get_node_id(node), get_node_id(node->selection_merge_block)); } } fprintf(file, "}\n"); fclose(file); } void CFGStructurizer::log_cfg(const char *tag) const { LOGI("\n======== %s =========\n", tag); for (auto index = forward_post_visit_order.size(); index; index--) { auto *node = forward_post_visit_order[index - 1]; LOGI("%s:\n", node->name.c_str()); switch (node->ir.terminator.type) { case Terminator::Type::Branch: LOGI(" Branch -> %s\n", node->ir.terminator.direct_block->name.c_str()); break; case Terminator::Type::Condition: LOGI(" Cond -> %s | %s\n", node->ir.terminator.true_block->name.c_str(), node->ir.terminator.false_block->name.c_str()); break; case Terminator::Type::Return: LOGI(" Return\n"); break; case Terminator::Type::Unreachable: LOGI(" Unreachable\n"); break; case Terminator::Type::Switch: LOGI(" Switch\n"); for (auto &c : node->ir.terminator.cases) { if (c.is_default) LOGI(" Default -> %s\n", c.node->name.c_str()); else LOGI(" Case %u -> %s\n", c.value, c.node->name.c_str()); } break; case Terminator::Type::Kill: LOGI(" Kill\n"); break; } switch (node->merge) { case MergeType::Selection: LOGI(" SelectionMerge -> %s\n", node->selection_merge_block ? node->selection_merge_block->name.c_str() : "N/A"); break; case MergeType::Loop: LOGI(" LoopMerge -> %s\n", node->loop_merge_block ? node->loop_merge_block->name.c_str() : "N/A"); LOGI(" Continue -> %s\n", node->pred_back_edge ? node->pred_back_edge->name.c_str() : "N/A"); break; default: break; } LOGI("\n"); } LOGI("\n=====================\n"); } //#define PHI_DEBUG #ifdef PHI_DEBUG static void validate_phi(const PHI &phi) { auto incomings = phi.incoming; std::sort(incomings.begin(), incomings.end(), [](const IncomingValue &a, const IncomingValue &b) { return a.block < b.block; }); auto itr = std::unique(incomings.begin(), incomings.end(), [](const IncomingValue &a, const IncomingValue &b) { return a.block == b.block; }); if (itr != incomings.end()) abort(); } static void validate_phi(const Vector &phis) { for (auto &phi : phis) validate_phi(phi); } #else #define validate_phi(phi) ((void)0) #endif void CFGStructurizer::eliminate_node_link_preds_to_succ(CFGNode *node) { assert(node->succ.size() == 1 || node->succ.size() == 2); #ifdef PHI_DEBUG for (auto *succ : node->succ) validate_phi(succ->ir.phi); #endif Vector break_nodes; auto pred_copy = node->pred; auto succ_copy = node->succ; for (auto *pred : pred_copy) { auto *break_node = pool.create_node(); break_node->name = node->name + ".break." + pred->name; break_node->ir.terminator = node->ir.terminator; for (auto *succ : succ_copy) break_node->add_branch(succ); break_node->immediate_post_dominator = node->immediate_post_dominator; break_node->immediate_dominator = pred; pred->retarget_branch(node, break_node); break_nodes.push_back(break_node); for (auto &phi : node->ir.phi) { for (auto &incoming : phi.incoming) { if (incoming.block == pred) { incoming.block = break_node; // We have no opcodes in this block, but we may depend on a PHI variable to do conditional branch. if (phi.id == break_node->ir.terminator.conditional_id) break_node->ir.terminator.conditional_id = incoming.id; } } } } assert(node->pred.empty()); for (auto *succ : node->succ) { for (auto &phi : succ->ir.phi) { // Find incoming ID from the block we're splitting up. auto incoming_itr = std::find_if(phi.incoming.begin(), phi.incoming.end(), [&](const IncomingValue &incoming) { return incoming.block == node; }); assert(incoming_itr != phi.incoming.end()); spv::Id incoming_from_node = incoming_itr->id; phi.incoming.erase(incoming_itr); // Try to see if the ID is a PHI that was generated by this block. auto outgoing_itr = std::find_if(node->ir.phi.begin(), node->ir.phi.end(), [&](const PHI &phi) { return phi.id == incoming_from_node; }); if (outgoing_itr != node->ir.phi.end()) { // If it was then we need to split up the PHI node. The break block will serve as a proxy // incoming block instead. phi.incoming.insert(phi.incoming.end(), outgoing_itr->incoming.begin(), outgoing_itr->incoming.end()); validate_phi(succ->ir.phi); } else { // A plain value is passed down to succ, most likely a constant which lives at global scope. // We know this block does not generate this ID, so it must be either a value generated at global scope // (constant), or a value created by a block which dominates this node, // which also means it dominates all preds to this node. for (auto *break_pred : break_nodes) phi.incoming.push_back({ break_pred, incoming_from_node }); validate_phi(succ->ir.phi); } } // Remove any lingering pred, since node is now unreachable, and if we do more transforms without // recomputing CFG, we'll add impossible PHI inputs. auto erase_itr = std::find(succ->pred.begin(), succ->pred.end(), node); if (erase_itr != succ->pred.end()) succ->pred.erase(erase_itr); } node->ir.phi.clear(); } bool CFGStructurizer::cleanup_breaking_return_constructs() { unsigned post_dominating_returns = 0; CFGNode *split_candidate = nullptr; for (auto *node : forward_post_visit_order) { if (node->ir.terminator.type != Terminator::Type::Return) continue; // If this block is only serving to return, it's meaningless to merge. // It will only complicate the CFG. if (node->ir.operations.empty() && node->num_forward_preds() > 1 && !node->post_dominates_any_work()) { split_candidate = node; } else { // If we're actually post-dominating other blocks, the split candidate is relevant. for (auto *pred : node->pred) { if (node->post_dominates(pred)) { post_dominating_returns++; break; } } } } // Only bother if we have more than one return and at least another return that is actually post-dominating // work. Avoids potential false positives. if (!post_dominating_returns) return false; if (split_candidate) { auto preds = split_candidate->pred; for (auto *pred : preds) { auto *dummy_return = pool.create_node(); dummy_return->name = split_candidate->name + ".dup"; dummy_return->immediate_dominator = split_candidate->immediate_dominator; dummy_return->immediate_post_dominator = exit_block; dummy_return->forward_post_visit_order = split_candidate->forward_post_visit_order; dummy_return->backward_post_visit_order = split_candidate->backward_post_visit_order; dummy_return->ir.terminator.type = Terminator::Type::Return; pred->retarget_branch(split_candidate, dummy_return); } // Iterate until we are done. recompute_cfg(); return true; } return false; } bool CFGStructurizer::block_is_breaking_phi_construct(const CFGNode *node) const { // Only bother with blocks which don't do anything useful work. // The only opcodes they should have are PHI nodes and a (conditional) branch. if (!node->ir.operations.empty()) return false; if (node->pred.size() <= 1) return false; // Don't bother with anything that could be considered load bearing. if (node->post_dominates_perfect_structured_construct()) return false; // Anything related to loop/continue blocks, we don't bother with. if (node->succ_back_edge || node->pred_back_edge) return false; if (node->succ.size() == 1) { if (node->ir.terminator.type != Terminator::Type::Branch) return false; } else if (node->succ.size() == 2) { if (node->ir.terminator.type != Terminator::Type::Condition) return false; } else return false; for (auto *succ : node->succ) { if (node->dominates(succ)) return false; // Checks if either the merge block or successor is sensitive to PHI somehow. if (!ladder_chain_has_phi_dependencies(succ, node)) return false; } // This is a merge block candidate for a loop, don't split. // It will only confuse things where we'll need to re-merge the split blocks anyways. for (auto *pred : node->pred) if (pred->succ_back_edge) return false; // A more complicated case where we want the block to remain as a ladder block. auto *loop_header = get_innermost_loop_header_for(node); if (loop_header && loop_header->pred_back_edge && loop_header->dominates(node) && loop_header->pred_back_edge->succ.empty()) { bool merge_is_outside_loop = !query_reachability(*node, *loop_header->pred_back_edge); if (merge_is_outside_loop) { auto *header_pdom = loop_header->pred_back_edge->immediate_post_dominator; // We only want to avoid the split when this is a meaningful ladder. // If the paths all end up in the same merge anyway, it's safer to split. for (auto *df : node->dominance_frontier) if (df == header_pdom) return true; for (auto *pdf : node->post_dominance_frontier) { // We can't reach, but the PDF can. We're confident we're a loop exit. if (query_reachability(*pdf, *loop_header->pred_back_edge)) return false; } } } return true; } void CFGStructurizer::cleanup_breaking_phi_constructs() { bool did_work = false; // There might be cases where we have a common break block from different scopes which only serves to PHI together some values // before actually breaking, and passing that PHI node on to the actual break block. // This causes problems because this looks very much like a merge, but it is actually not and forces validation errors. // Another case is where the succ block takes PHI nodes from the breaking block only, // which is relevant if only constants are somehow used in the PHI construct. for (size_t i = forward_post_visit_order.size(); i; i--) { auto *node = forward_post_visit_order[i - 1]; if (block_is_breaking_phi_construct(node)) { eliminate_node_link_preds_to_succ(node); did_work = true; } } if (did_work) recompute_cfg(); } static void scrub_rov_begin_lock(CFGNode *node, bool preserve_first_begin) { auto begin_itr = node->ir.operations.begin(); if (preserve_first_begin) { begin_itr = std::find_if(node->ir.operations.begin(), node->ir.operations.end(), [](const Operation *op) { return op->op == spv::OpBeginInvocationInterlockEXT; }); assert(begin_itr != node->ir.operations.end()); ++begin_itr; } auto itr = std::remove_if(begin_itr, node->ir.operations.end(), [](const Operation *op) { return op->op == spv::OpBeginInvocationInterlockEXT; }); node->ir.operations.erase(itr, node->ir.operations.end()); } static void scrub_rov_end_lock(CFGNode *node, bool preserve_last_end) { auto end_itr = node->ir.operations.end(); if (preserve_last_end) { for (size_t i = node->ir.operations.size(); i; i--) { size_t index = i - 1; auto &op = node->ir.operations[index]; if (op->op == spv::OpEndInvocationInterlockEXT) { end_itr = node->ir.operations.begin() + index; break; } } } auto itr = std::remove_if(node->ir.operations.begin(), end_itr, [](const Operation *op) { return op->op == spv::OpEndInvocationInterlockEXT; }); node->ir.operations.erase(itr, end_itr); } static void scrub_rov_lock_regions(CFGNode *node, bool preserve_first_begin, bool preserve_last_end) { scrub_rov_begin_lock(node, preserve_first_begin); scrub_rov_end_lock(node, preserve_last_end); } bool CFGStructurizer::find_single_entry_exit_lock_region( CFGNode *&idom, CFGNode *&pdom, const Vector &rov_blocks) { // If the lock region has multiple instances, i.e. a loop, give up right away, unless the construct is simple // and we can trivially do: // begin(); for(;;) {} end(); // For this to work, all ROV blocks must be contained by one loop. The must be a trivial input branch to the loop // header, and trivial exit out of the loop, i.e. one loop exit which is covered by the continue block. auto *outermost_loop_header = idom ? const_cast(get_innermost_loop_header_for(entry_block, idom)) : nullptr; while (outermost_loop_header && outermost_loop_header != entry_block) { auto *innermost_loop_header = const_cast( get_innermost_loop_header_for(entry_block, outermost_loop_header->immediate_dominator)); // Stop right before we hit the entry block. if (innermost_loop_header && innermost_loop_header != entry_block) outermost_loop_header = innermost_loop_header; else break; } if (idom && outermost_loop_header != entry_block) { // First, all ROV blocks must be inside the loop construct. for (auto *rov : rov_blocks) { if (!outermost_loop_header->dominates(rov) || !query_reachability(*rov, *outermost_loop_header->pred_back_edge)) { // Cannot promote directly. Can only promote if idom is entered once. return execution_path_is_single_entry_and_dominates_exit(idom, pdom); } } idom = outermost_loop_header; auto analysis = analyze_loop(outermost_loop_header); auto merge = analyze_loop_merge(outermost_loop_header, analysis); if (!merge.merge || !merge.dominated_merge || merge.infinite_continue_ladder || merge.merge != merge.dominated_merge) { return false; } else { pdom = merge.merge; } // We must insert the lock before entering loop. // This only works if we have exactly one pred and that pred directly branches to us. if (idom->pred.size() == 1 && idom->pred.front()->ir.terminator.type == Terminator::Type::Branch) idom = idom->pred.front(); else return false; } return true; } bool CFGStructurizer::execution_path_is_single_entry_and_dominates_exit(CFGNode *idom, CFGNode *pdom) { if (!idom->dominates_all_reachable_exits()) return false; pdom = CFGNode::find_common_post_dominator(pdom, idom); bool internal_early_return = !pdom || pdom->immediate_post_dominator == pdom; if (internal_early_return) return false; // If we're dominating all reachable exits despite being inside a loop, it's okay to use ROV as-is. // We have proven that this path will only be executed once per thread. // We will have to make sure that this exit path doesn't loop itself. // Just prove this by making sure there are no back-edges on the path from idom to pdom. // If there are back-edges that loop back to an earlier header, that is covered by dominates_all_reachable_exits. if (idom->pred_back_edge || !idom->dominates(pdom)) return false; while (pdom != idom) { if (pdom->pred_back_edge) return false; pdom = pdom->immediate_dominator; } return true; } void CFGStructurizer::flatten_subgroup_shuffles() { recompute_cfg(); // Look for cases where shuffles happen inside small branches. // This comes up due to HLSL's short-cicruit rules. for (auto *n : forward_post_visit_order) { // Only care about blocks which don't dominate anything. if (n->succ.size() != 1 || n->dominance_frontier.size() != 1 || n->dominance_frontier.front() != n->succ.front()) continue; if (n->pred.size() != 1) continue; if (!n->pred.front()->dominates(n->succ.front())) continue; if (n->pred.front()->succ.size() != 2) continue; // There's a limit to how much we want to peephole. if (n->ir.operations.size() > 4) continue; // We don't want to hoist if both sides of the branch have meaningful work associated with them. auto *succ = n->succ.front(); auto *pred = n->pred.front(); auto *sibling0 = pred->succ[0]; auto *sibling1 = pred->succ[1]; if (sibling0 != succ && sibling0 != n && !sibling0->ir.operations.empty()) continue; if (sibling1 != succ && sibling1 != n && !sibling1->ir.operations.empty()) continue; // Now we've detected: // if (blah) { a = shuffle(); } phi(a); bool has_dubious_shuffle = false; for (auto *op : n->ir.operations) { if (op->op == spv::OpGroupNonUniformShuffle || op->op == spv::OpGroupNonUniformBroadcast) { for (auto &phi : n->succ.front()->ir.phi) { for (auto &incoming : phi.incoming) { if (incoming.id == op->id) { has_dubious_shuffle = true; goto out; } } } } } out: if (has_dubious_shuffle) { // Now the question is if it's safe to do this. There can be nothing control dependent (except for shuffles). for (auto *op : n->ir.operations) { if (op->op == spv::OpGroupNonUniformShuffle || op->op == spv::OpGroupNonUniformBroadcast) continue; if (op->op == spv::OpLoad) { // Only allow loads if it's loading from plain OpVariables. // Hoisting a buffer read is not acceptable. if (!module.get_builder().hasDecoration(op->arguments[0], spv::DecorationBuiltIn)) { has_dubious_shuffle = false; break; } } if (SPIRVModule::opcode_is_control_dependent(op->op) || op->id == 0 || SPIRVModule::opcode_has_side_effect_and_result(op->op)) { has_dubious_shuffle = false; break; } } } if (has_dubious_shuffle) { for (auto *op : n->ir.operations) n->pred.front()->ir.operations.push_back(op); n->ir.operations.clear(); } } } void CFGStructurizer::rewrite_auto_group_shared_barrier() { recompute_cfg(); enum class Kind { None, Load, Store, Atomic }; struct Block { CFGNode *node; const CFGNode *innermost_loop; Kind pre_kind; Kind post_kind; }; // In linear traversal order, find all BBs that use group shared. Vector shared_blocks; for (size_t i = forward_post_visit_order.size(); i; i--) { auto *node = forward_post_visit_order[i - 1]; for (auto *op : node->ir.operations) { if ((op->flags & Operation::AutoGroupSharedBarrier) != 0) { shared_blocks.push_back({ node, get_innermost_loop_header_for(node), Kind::None, Kind::None }); break; } } } // Deal with intra-BB hazards. for (auto &block : shared_blocks) { Kind pending = Kind::None; // If we're the first BB to access shared, no need for a post block. // Similar for the last block. // Loops can complicate this analysis, but ... eh. // This is a workaround, not required by spec or anything. for (auto *op : block.node->ir.operations) { if ((op->flags & Operation::AutoGroupSharedBarrier) != 0) { if (op->op == spv::OpLoad || op->op == spv::PseudoOpMaskedLoad) { if (pending != Kind::Load && pending != Kind::None) op->flags |= Operation::SubgroupSyncPre; pending = Kind::Load; } else if (op->op == spv::OpStore || op->op == spv::PseudoOpMaskedStore) { if (pending != Kind::Store && pending != Kind::None) op->flags |= Operation::SubgroupSyncPre; pending = Kind::Store; } else { if (pending != Kind::Atomic && pending != Kind::None) op->flags |= Operation::SubgroupSyncPre; pending = Kind::Atomic; } if (block.pre_kind == Kind::None) block.pre_kind = pending; } } block.post_kind = pending; } for (size_t i = 0; i < shared_blocks.size(); i++) { auto &first = shared_blocks[i]; for (size_t j = i + 1; j < shared_blocks.size(); j++) { auto &second = shared_blocks[j]; if (!query_reachability(*first.node, *second.node)) continue; if (first.post_kind != second.pre_kind) { // Find an intermediate block which: // - post dominates the first // - dominates the second // Has the maximal number of invocations. // The subgroup barrier should be run with as many threads as possible. if (second.node->post_dominates(first.node)) second.node->ir.operations.front()->flags |= Operation::SubgroupSyncPre; else if (first.node->dominates(second.node)) first.node->ir.operations.back()->flags |= Operation::SubgroupSyncPost; else { // Try to find some intermediate node. If we cannot find it, just yolo in a barrier // somewhere. This is just a workaround, so if it doesn't work 100%, it's not a big deal. auto *pdom = first.node->immediate_post_dominator; while (pdom && query_reachability(*pdom, *second.node) && !pdom->dominates(second.node) && pdom->immediate_post_dominator && pdom->immediate_post_dominator != pdom) { pdom = pdom->immediate_post_dominator; } if (pdom && pdom != second.node) { if (pdom->ir.operations.empty()) { auto *nop = module.allocate_op(spv::OpNop); nop->flags |= Operation::SubgroupSyncPost; pdom->ir.operations.push_back(nop); } else pdom->ir.operations.back()->flags |= Operation::SubgroupSyncPost; } else if (pdom == second.node) { second.node->ir.operations.front()->flags |= Operation::SubgroupSyncPre; } } // We've added appropriate barriers for this node now. second.pre_kind = Kind::None; } break; } // Analyze re-entrant code. We may depend on memory coming from an earlier loop iteration. if (first.pre_kind != Kind::None && first.innermost_loop != entry_block && first.innermost_loop->pred_back_edge) { bool has_complex_dependency = false; // Other blocks within the loop may require a dependency. for (size_t j = i + 1; j < shared_blocks.size() && !has_complex_dependency; j++) { if (query_reachability(*shared_blocks[j].node, *first.innermost_loop->pred_back_edge)) { first.node->ir.operations.front()->flags |= Operation::SubgroupSyncPre; has_complex_dependency = true; } } if (!has_complex_dependency && first.pre_kind != first.post_kind) { // Self-dependency within the BB. first.node->ir.operations.back()->flags |= Operation::SubgroupSyncPost; } } } } bool CFGStructurizer::rewrite_rov_lock_region() { recompute_cfg(); // First, find all BBs that use ROV. Vector rov_blocks; for (auto *node : forward_post_visit_order) { for (auto &op : node->ir.operations) { if (op->op == spv::OpBeginInvocationInterlockEXT) { rov_blocks.push_back(node); break; } } } // If we declare ROVs but never actually use them ... *shrug* if (rov_blocks.empty()) return true; // Rules: OpBegin and OpEnd must be dynamically called exactly once. // To simplify, we want to only emit one begin and one end that covers the entire shader. // Usually ROV access is constrained to a single BB as a simple case. // Simple BB case fails with control flow. E.g. a loop or conditional. In this case we must widen the range // of the lock such that: end post-dominates begin. Begin post-dominates entry. // If we cannot make this work, flag as non-trivial and wrap the entire shader in a big lock. auto *idom = rov_blocks.front(); for (size_t i = 1; i < rov_blocks.size() && idom; i++) idom = CFGNode::find_common_dominator(idom, rov_blocks[i]); // Stretch scope as long as we don't post-dominate entry. while (idom && idom != entry_block && !idom->post_dominates(entry_block)) idom = idom->immediate_dominator; auto *pdom = find_common_post_dominator(rov_blocks); if (!pdom || !idom || !find_single_entry_exit_lock_region(idom, pdom, rov_blocks) || !idom->dominates(pdom)) { idom = nullptr; pdom = nullptr; } // Stretch post-dominator if we need to. if (idom && pdom) pdom = CFGNode::find_common_post_dominator(pdom, idom); bool internal_early_return = !pdom || pdom->immediate_post_dominator == pdom; // Non trivial case. if (!idom || !pdom || internal_early_return) { for (auto *node : rov_blocks) scrub_rov_lock_regions(node, false, false); return false; } bool begin_block_has_lock = std::find(rov_blocks.begin(), rov_blocks.end(), idom) != rov_blocks.end(); bool end_block_has_lock = std::find(rov_blocks.begin(), rov_blocks.end(), pdom) != rov_blocks.end(); for (auto *node : rov_blocks) scrub_rov_lock_regions(node, node == idom, node == pdom); if (!begin_block_has_lock) idom->ir.operations.push_back(module.allocate_op(spv::OpBeginInvocationInterlockEXT)); if (!end_block_has_lock) pdom->ir.operations.insert(pdom->ir.operations.begin(), module.allocate_op(spv::OpEndInvocationInterlockEXT)); return true; } void CFGStructurizer::rewrite_multiple_back_edges() { reset_traversal(); visit_for_back_edge_analysis(*entry_block); } void CFGStructurizer::sink_ssa_constructs() { sink_ssa_constructs_run(true); sink_ssa_constructs_run(false); } void CFGStructurizer::sink_ssa_constructs_run(bool dry_run) { // First, propagate sinkability state to any operation that uses a sinkable SSA. // If an SSA expression is used in a BB, but that use of the SSA can be sunk, we need to // sink everything as a group. Vector sinkable_ops; struct RewriteState { CFGNode *consumed_block; Operation *op; }; UnorderedMap sinks; for (auto *n : forward_post_visit_order) { sinkable_ops.clear(); auto &ops = n->ir.operations; for (auto *op : ops) { if ((op->flags & Operation::SinkableBit) != 0) { sinkable_ops.push_back(op->id); sinks[op->id] = { nullptr, op }; } else if (op->id && !SPIRVModule::opcode_is_control_dependent(op->op) && !SPIRVModule::opcode_has_side_effect_and_result(op->op)) { // We cannot sink any opcode which is control dependent, or has side effects. for (uint32_t i = 0; i < op->num_arguments; i++) { if ((op->literal_mask & (1u << i)) != 0) continue; spv::Id consumed_id = op->arguments[i]; if (std::find(sinkable_ops.begin(), sinkable_ops.end(), consumed_id) != sinkable_ops.end()) { sinkable_ops.push_back(op->id); op->flags |= Operation::DependencySinkableBit; sinks[op->id] = { nullptr, op }; break; } } } else if (op->op == spv::OpControlBarrier || op->op == spv::OpMemoryBarrier) { // We cannot sink beyond this barrier. Invalidate every sinkable op we saw so far. for (spv::Id id : sinkable_ops) { auto *op_ptr = sinks[id].op; assert(op_ptr); op_ptr->flags &= ~(Operation::SinkableBit | Operation::DependencySinkableBit); } sinkable_ops.clear(); } } } // If an expression is used as a PHI input assume we cannot sink. // It gets a bit awkward to deal with this, and it's not required for this workaround pass. for (auto *n : forward_post_visit_order) { for (auto &phi : n->ir.phi) { for (auto &incoming : phi.incoming) { auto itr = sinks.find(incoming.id); if (itr != sinks.end()) { auto *op_ptr = itr->second.op; assert(op_ptr); op_ptr->flags &= ~(Operation::SinkableBit | Operation::DependencySinkableBit); } } } } const auto consume_id = [&](spv::Id consumed_id, CFGNode *n) { auto itr = sinks.find(consumed_id); if (itr != sinks.end()) { if (!itr->second.consumed_block) itr->second.consumed_block = n; else if (itr->second.consumed_block != n) itr->second.op->flags &= ~(Operation::SinkableBit | Operation::DependencySinkableBit); } }; const auto path_is_reorderable = [&](const CFGNode *src, const CFGNode *dst) { // There cannot be any control or memory barriers along the way, or we have to be conservative. // There is absolutely no point in sinking if dst ends up post-dominating src anyway. // We cannot avoid any bug from happening. if (dst->post_dominates(src)) return false; // Never sink into a loop. if (dst->pred_back_edge) return false; // Could deal with multiple preds, but we mostly just care about trivial sinks. if (dst->pred.size() > 1) return false; dst = dst->immediate_dominator; while (src != dst) { if (dst->pred.size() > 1 || dst->pred_back_edge) return false; for (auto *op : dst->ir.operations) if (op->op == spv::OpControlBarrier || op->op == spv::OpMemoryBarrier) return false; dst = dst->pred.front(); } // We reached src, and we validated that block already when deciding on what is sinkable or not, so we're good. return true; }; // Walk all instructions in reverse order. // We can sink an instruction if: // - An ID was only consumed in a BB != generating BB. // The consumed BB must be unique for us to consider it for simplicity. for (auto *n : forward_post_visit_order) { if (n->ir.terminator.type == Terminator::Type::Condition || n->ir.terminator.type == Terminator::Type::Switch) { consume_id(n->ir.terminator.conditional_id, n); } else if (n->ir.terminator.type == Terminator::Type::Return && n->ir.terminator.return_value != 0) { consume_id(n->ir.terminator.return_value, n); } auto &ops = n->ir.operations; for (size_t i = ops.size(); i; i--) { auto *op = ops[i - 1]; auto *target_block = n; if (op->id && (op->flags & (Operation::SinkableBit | Operation::DependencySinkableBit)) != 0) { auto sink_itr = sinks.find(op->id); if (sink_itr != sinks.end() && sink_itr->second.consumed_block && sink_itr->second.consumed_block != n && path_is_reorderable(n, sink_itr->second.consumed_block)) { // Move the operation to the beginning of the consumed block instead. target_block = sink_itr->second.consumed_block; // Don't actually move the instruction until we have confirmed the entire chain can be sunk, // otherwise this exercise is meaningless. if (!dry_run) { target_block->ir.operations.insert(target_block->ir.operations.begin(), op); ops.erase(ops.begin() + int(i - 1)); } } else { // This failed to sink. Remember this for the next run. op->flags &= ~Operation::SinkableBit; } } // Mark uses after we have sunk the instruction. This allows us to sink a chain of SSA instructions. for (uint32_t j = 0; j < op->num_arguments; j++) if ((op->literal_mask & (1u << j)) == 0) consume_id(op->arguments[j], target_block); } } if (dry_run) for (auto *n : forward_post_visit_order) for (auto *op : n->ir.operations) op->flags &= ~Operation::DependencySinkableBit; } void CFGStructurizer::propagate_branch_control_hints() { for (auto *n : forward_post_visit_order) { if (n->pred_back_edge) { if (n->pred_back_edge->ir.terminator.force_loop) n->ir.merge_info.loop_control_mask = spv::LoopControlDontUnrollMask; else if (n->pred_back_edge->ir.terminator.force_unroll) n->ir.merge_info.loop_control_mask = spv::LoopControlUnrollMask; } if (n->ir.terminator.type == Terminator::Type::Condition) { if (n->ir.terminator.force_flatten) n->ir.merge_info.selection_control_mask = spv::SelectionControlFlattenMask; else if (n->ir.terminator.force_branch) n->ir.merge_info.selection_control_mask = spv::SelectionControlDontFlattenMask; } // Both are possible if a selection construct is also a loop header. } } void CFGStructurizer::remove_unused_ssa() { UnorderedSet removed_ids; UnorderedSet used_ids; for (auto *node : forward_post_visit_order) { for (auto &phi : node->ir.phi) for (auto &incoming : phi.incoming) used_ids.insert(incoming.id); for (auto *op : node->ir.operations) for (unsigned i = 0; i < op->num_arguments; i++) if ((op->literal_mask & (1u << i)) == 0) used_ids.insert(op->arguments[i]); if (node->ir.terminator.conditional_id) used_ids.insert(node->ir.terminator.conditional_id); if (node->ir.terminator.type == Terminator::Type::Return && node->ir.terminator.return_value != 0) { used_ids.insert(node->ir.terminator.return_value); } } for (auto *node : forward_post_visit_order) { node->ir.phi.erase(std::remove_if(node->ir.phi.begin(), node->ir.phi.end(), [&](const PHI &phi) { return used_ids.count(phi.id) == 0; }), node->ir.phi.end()); node->ir.operations.erase( std::remove_if( node->ir.operations.begin(), node->ir.operations.end(), [&](const Operation *op) { bool ret = op->id != 0 && !SPIRVModule::opcode_has_side_effect_and_result(op->op) && used_ids.count(op->id) == 0; if (ret) removed_ids.insert(op->id); return ret; }), node->ir.operations.end()); } module.get_builder().removeDecorations(removed_ids); } bool CFGStructurizer::rewrite_impossible_back_edges() { bool did_rewrite = false; for (auto *node : forward_post_visit_order) { if (!node->succ_back_edge) continue; // Make sure that the continue block in question branches to the innermost loop header. // If this is not the case, it is not a valid structured CFG. // In unstructured CFG, as long as the continue block cannot reach the back-edge of any inner loop constructs, // it's technically not considered part of their loops, even if the loops dominate it. // Utter nonsense ... >_< // The only viable solution is to transpose out the continue block and use ladder selection // to resolve the control flow. auto *header = get_innermost_loop_header_for(node); if (header == node->succ_back_edge) continue; // Make sure that we're in valid unstructured control flow. Our node cannot reach any back edge on the way, // meaning it's okay to transpose code. If the continue block can reach us, it means we're already // outside the loop, stop any attempt to transpose. const auto validate_header_suitability = [this, node](const CFGNode *header) { return !query_reachability(*node, *header->pred_back_edge) && !query_reachability(*header->pred_back_edge, *node); }; // Find a more appropriate place to put it. // We want to rewrite the flow so that the continue block lives outside any inner scopes. // The succ of the outer continue block is appropriate. const CFGNode *next_header; while ((next_header = get_innermost_loop_header_for(header->immediate_dominator)) != node->succ_back_edge && validate_header_suitability(next_header)) { header = next_header; } if (next_header != node->succ_back_edge || !validate_header_suitability(header)) continue; auto *outer_continue = header->pred_back_edge; // The outer continue must have a normal succ. if (outer_continue->succ.size() != 1) continue; // This succ is now in the loop scope of node->succ_back_edge. We can do the continue construct here. auto *succ = outer_continue->succ.front(); auto *ladder = create_helper_pred_block(succ); auto orig_preds = node->pred; traverse_dominated_blocks_and_rewrite_branch(node->succ_back_edge, node, ladder); rewrite_ladder_conditional_branch_from_incoming_blocks( ladder, node, succ, [&orig_preds](const CFGNode *n) { return std::find(orig_preds.begin(), orig_preds.end(), n) != orig_preds.end(); }, "tranpose_continue_phi"); did_rewrite = true; break; } if (did_rewrite) recompute_cfg(); return did_rewrite; } bool CFGStructurizer::run_trivial() { recompute_cfg(); sink_ssa_constructs(); propagate_branch_control_hints(); // Remove unused SSA ops in this path. remove_unused_ssa(); return true; } bool CFGStructurizer::run() { String graphviz_path; if (const char *env = getenv("DXIL_SPIRV_GRAPHVIZ_PATH")) graphviz_path = env; // We make the assumption during traversal that there is only one back edge. // Fix this up here. rewrite_multiple_back_edges(); //log_cfg("Input state"); if (!graphviz_path.empty()) { reset_traversal(); visit(*entry_block); auto graphviz_input = graphviz_path + ".input"; log_cfg_graphviz(graphviz_input.c_str()); } recompute_cfg(); sink_ssa_constructs(); propagate_branch_control_hints(); cleanup_breaking_phi_constructs(); if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".phi-split"; log_cfg_graphviz(graphviz_split.c_str()); } while (cleanup_breaking_return_constructs()) { if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".break-return"; log_cfg_graphviz(graphviz_split.c_str()); } } create_continue_block_ladders(); while (serialize_interleaved_early_returns()) { if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".serialize-early-return"; log_cfg_graphviz(graphviz_split.c_str()); } } while (serialize_interleaved_merge_scopes_aggressive()) { if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".serialize-aggressive"; log_cfg_graphviz(graphviz_split.c_str()); } } while (serialize_interleaved_merge_scopes()) { if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".serialize"; log_cfg_graphviz(graphviz_split.c_str()); } } split_merge_scopes(); recompute_cfg(); //log_cfg("Split merge scopes"); if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".split"; log_cfg_graphviz(graphviz_split.c_str()); } // We will have generated lots of ladder blocks // which might cause issues with further analysis, so // nuke them as required. eliminate_degenerate_blocks(); if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".eliminate0"; log_cfg_graphviz(graphviz_split.c_str()); } // Similar to cleanup_breaking_phi_constructs() in spirit, // but here we are forced to duplicate code blocks to make it work. duplicate_impossible_merge_constructs(); //log_cfg("Split impossible merges"); if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".duplicate"; log_cfg_graphviz(graphviz_split.c_str()); } while (rewrite_transposed_loops()) { if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".transpose-loop-rewrite"; log_cfg_graphviz(graphviz_split.c_str()); } } // If there are back-edges that punch through multiple loop headers, fix this up. while (rewrite_impossible_back_edges()) { if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".impossible-continue"; log_cfg_graphviz(graphviz_split.c_str()); } } //LOGI("=== Structurize pass ===\n"); while (structurize(0)) { recompute_cfg(); if (!graphviz_path.empty()) { auto graphviz_final = graphviz_path + ".partial-struct0"; log_cfg_graphviz(graphviz_final.c_str()); } } update_structured_loop_merge_targets(); //log_cfg("Structurize pass 0"); if (!graphviz_path.empty()) { auto graphviz_final = graphviz_path + ".struct0"; log_cfg_graphviz(graphviz_final.c_str()); } // We will have generated lots of ladder blocks // which might cause issues with further analysis, so // nuke them as required. eliminate_degenerate_blocks(); //log_cfg("Split merge scopes"); if (!graphviz_path.empty()) { auto graphviz_split = graphviz_path + ".eliminate1"; log_cfg_graphviz(graphviz_split.c_str()); } //LOGI("=== Structurize pass ===\n"); structurize(1); if (!graphviz_path.empty()) { auto graphviz_final = graphviz_path + ".struct1"; log_cfg_graphviz(graphviz_final.c_str()); } bool need_restructure = false; while (rewrite_invalid_loop_breaks()) { if (!graphviz_path.empty()) { auto graphviz_final = graphviz_path + ".loop-break-rewrite"; log_cfg_graphviz(graphviz_final.c_str()); } need_restructure = true; } if (need_restructure) { // Need to redo the final structurization pass if we end up here. structurize(1); } //log_cfg("Final"); if (!graphviz_path.empty()) { auto graphviz_final = graphviz_path + ".final"; log_cfg_graphviz(graphviz_final.c_str()); } insert_phi(); return true; } CFGNode *CFGStructurizer::get_entry_block() const { return entry_block; } static bool block_is_control_dependent(const CFGNode *node) { for (auto *op : node->ir.operations) if (SPIRVModule::opcode_is_control_dependent(op->op)) return true; return false; } bool CFGStructurizer::continue_block_can_merge(CFGNode *node) const { const CFGNode *pred_candidate = nullptr; auto *header = node->succ_back_edge; // This algorithm is very arbitrary and should be seen as a nasty heuristic which solves real shaders // we see in the wild. It's probably safe to block continue merge in far more cases than this, but we // want to be maximally convergent as often as we can. for (auto *pred : node->pred) { // This is the merge block of another inner loop, we really need an intermediate merge. if (pred->succ_back_edge && header != pred->succ_back_edge && header->dominates(pred->succ_back_edge)) return true; } // Plain continue block that does nothing useful. No point merging this. // A continue block's succ is sometimes used to aid analysis and simplify other passes, // use terminator here explicitly. if (node->ir.operations.empty() && node->ir.terminator.type == Terminator::Type::Branch) return false; if (header->ir.terminator.type == Terminator::Type::Switch) { // If the loop header is also a switch statement, there can be some nasty edge cases. // We likely never intend for the continue block to be maximally convergent here // if the natural merge block is not the continue block. auto *merge = find_common_post_dominator(header->succ); auto *natural_merge = find_natural_switch_merge_block(header, merge); if (merge == node && natural_merge != merge) return false; } for (auto *pred : node->pred) { // If we have a situation where a continue block has a pred which is itself a selection merge target, that // block is the merge target where we follow maximum convergence. // The candidate must be inside loop body and not the header itself. // Neither continue block nor merge target have any dominance relationship. if (pred->num_forward_preds() >= 2 && pred->succ.size() >= 2 && header != pred && !pred->dominates(node) && !node->post_dominates(pred)) { // If execution does not merge up right at the natural break block, // things will get very complicated. // In practice, we can handle merges as long as the candidate just breaks out normally. // If not, we have to introduce ladder breaking and this is (almost) impossible to get right. auto *common_post_dominator = CFGNode::find_common_post_dominator(node, pred); if (common_post_dominator && std::find(node->succ.begin(), node->succ.end(), common_post_dominator) == node->succ.end()) { pred_candidate = pred; } } // If we have a situation where a switch block inside our loop uses the continue block // as a continue target, it's important that we keep this block as a continue block, // otherwise, it will complicate the switch block greatly. if (pred->ir.terminator.type == Terminator::Type::Switch && !node->post_dominates(pred)) return false; } // No obviously nasty case to handle, probably safe to let the algorithm do its thing ... if (!pred_candidate) return true; // Need to find another escape edge which is neither header nor the candidate. bool found_another_escape_edge = false; for (auto *pred : node->pred) { if (pred != header && pred != pred_candidate && !pred->dominates(node)) { found_another_escape_edge = true; break; } } // If we have yet another escape edge, we probably cannot merge to continue ... return !found_another_escape_edge; } void CFGStructurizer::create_continue_block_ladders() { // It does not seem to be legal to merge directly to continue blocks. // To make it possible to merge execution, we need to create a ladder block which we can merge to. // There are certain scenarios where it is impossible to merge to a continue block. // In this case, we will abandom maximum convergence and use the continue block as a "break"-like target. bool need_recompute_cfg = false; for (auto *node : forward_post_visit_order) { if (block_is_plain_continue(node) && continue_block_can_merge(node)) { //LOGI("Creating helper pred block for continue block: %s\n", node->name.c_str()); create_helper_pred_block(node); need_recompute_cfg = true; } } if (need_recompute_cfg) recompute_cfg(); } void CFGStructurizer::update_structured_loop_merge_targets() { // First, we need to do this before recomputing the CFG, since we lose // normal loop merge targets when recomputing. structured_loop_merge_targets.clear(); for (auto *node : forward_post_visit_order) { if (node->loop_merge_block) structured_loop_merge_targets.insert(node->loop_merge_block); if (node->loop_ladder_block) structured_loop_merge_targets.insert(node->loop_ladder_block); } recompute_cfg(); // Make sure we include merge blocks which are frozen merge targets in ladder blocks, which // were not included in the post visit order yet. for (auto *node : forward_post_visit_order) { if (node->loop_merge_block) structured_loop_merge_targets.insert(node->loop_merge_block); if (node->loop_ladder_block) structured_loop_merge_targets.insert(node->loop_ladder_block); } } static spv::Id get_remapped_id_for_duplicated_block(spv::Id id, const UnorderedMap &remap) { auto itr = remap.find(id); if (itr != remap.end()) return itr->second; else return id; } Operation *CFGStructurizer::duplicate_op(Operation *op, UnorderedMap &id_remap) { Operation *duplicated_op; if (op->id) duplicated_op = module.allocate_op(op->op, module.allocate_id(), op->type_id); else duplicated_op = module.allocate_op(op->op); for (unsigned i = 0; i < op->num_arguments; i++) { if (op->literal_mask & (1u << i)) duplicated_op->add_literal(op->arguments[i]); else duplicated_op->add_id(get_remapped_id_for_duplicated_block(op->arguments[i], id_remap)); } if (op->id) id_remap[op->id] = duplicated_op->id; return duplicated_op; } bool CFGStructurizer::can_duplicate_phis(const CFGNode *node) { // If we want to duplicate nodes, we cannot do so in complicated scenarios where // we need to resolve PHIs. For example, if a node is split, the split nodes might have to // insert PHI nodes covering the subset of nodes which can reach each split. // This get very hairy, very quickly. // To check this, ensure that the node we want to split does not require any complex PHI handling. // First, validate that we can even find incoming values properly. for (auto *pred : node->pred) { for (auto &phi : node->ir.phi) { auto itr = find_incoming_value(pred, phi.incoming); if (itr == phi.incoming.end()) return false; } } // Then, make sure that every incoming value dominates at least pred of node. // This way, we know that we don't need complicated PHI frontier merges along the way. for (auto &phi : node->ir.phi) { for (auto &incoming : phi.incoming) { bool dominates_at_least_one_pred = false; for (auto *pred : node->pred) { if (incoming.block->dominates(pred)) { dominates_at_least_one_pred = true; break; } } if (!dominates_at_least_one_pred) return false; } } return true; } void CFGStructurizer::duplicate_node(CFGNode *node) { Vector> rewritten_ids; assert(node->succ.size() == 1); assert(node->pred.size() >= 2); assert(!node->dominates(node->succ.front())); Vector break_blocks(node->pred.size()); rewritten_ids.resize(node->pred.size()); auto *succ = node->succ.front(); auto tmp_pred = node->pred; for (size_t i = 0, n = tmp_pred.size(); i < n; i++) { auto *pred = tmp_pred[i]; auto &remap = rewritten_ids[i]; // First, rewrite PHI inputs. // Since we only have one pred now, we can resolve PHIs directly. auto *block = pool.create_node(); block->name = node->name + ".dup." + pred->name; block->ir.terminator.type = Terminator::Type::Branch; block->ir.terminator.direct_block = succ; block->immediate_post_dominator = succ; block->immediate_dominator = pred; pred->retarget_branch(node, block); block->add_branch(succ); for (auto &phi : node->ir.phi) { auto itr = find_incoming_value(pred, phi.incoming); assert(itr != phi.incoming.end()); remap[phi.id] = itr->id; } UnorderedSet remove_decoration_ids; for (auto *op : node->ir.operations) { auto *dup_op = duplicate_op(op, remap); bool nocontract = module.get_builder().hasDecoration(op->id, spv::DecorationNoContraction); if (nocontract) { remove_decoration_ids.insert(op->id); module.get_builder().addDecoration(dup_op->id, spv::DecorationNoContraction); } block->ir.operations.push_back(dup_op); } module.get_builder().removeDecorations(remove_decoration_ids); break_blocks[i] = block; } assert(node->pred.empty()); // Finally, look at succ. If it takes PHI inputs from node, we'll have to rewrite the PHIs. // We know that node does not dominate succ, // so succ cannot use any SSA variables node generated directly // without using PHI nodes. // We might have placed ladders in between so that we need to fixup PHI later than just plain succ. // Chase down the chain and replace all PHIs. // First, collect all the succs that we are supposed to examine. // The list should also include succ_back_edge because it is not in the succ chain after recompute_cfg. Vector succs; while (succ) { if (succ->succ_back_edge) succs.push_back(succ->succ_back_edge); succs.push_back(succ); if (succ->succ.size() == 1) succ = succ->succ.front(); else succ = nullptr; } for (auto *succ : succs) { bool done = false; for (auto &phi : succ->ir.phi) { // Find incoming ID from the block we're splitting up. auto incoming_itr = std::find_if(phi.incoming.begin(), phi.incoming.end(), [&](const IncomingValue &incoming) { return incoming.block == node; }); if (incoming_itr != phi.incoming.end()) { spv::Id incoming_from_node = incoming_itr->id; phi.incoming.erase(incoming_itr); for (size_t i = 0, n = tmp_pred.size(); i < n; i++) { auto &remap = rewritten_ids[i]; phi.incoming.push_back({ break_blocks[i], get_remapped_id_for_duplicated_block(incoming_from_node, remap) }); } // We've found the block we wanted to rewrite, terminate loop now. done = true; } } if (done) break; } } void CFGStructurizer::duplicate_impossible_merge_constructs() { Vector duplicate_queue; for (size_t i = forward_post_visit_order.size(); i; i--) { auto *node = forward_post_visit_order[i - 1]; // Never duplicate back-edges. if (node->succ_back_edge) continue; // Check for breaking merge blocks which were not considered degenerate. // This can happen if we actually have code in the breaking construct ... (scary!) // We'll have to split this block somehow. // If the candidate has control dependent effects like barriers and such, // this will likely break completely, // but I don't see how that would work on native drivers either ... // WARNING: This check is EXTREMELY sensitive and microscopic changes to the implementation // will dramatically affect codegen. bool breaking = merge_candidate_is_on_breaking_path(node); if (breaking && !node->ir.operations.empty() && !block_is_control_dependent(node)) duplicate_queue.push_back(node); } if (duplicate_queue.empty()) return; for (auto *node : duplicate_queue) { if (!can_duplicate_phis(node)) { // A block could be subtly load bearing, in that if we split the node, it becomes impossible to resolve // PHIs and we hit assertions in duplicate_node(). // This means the block is probably load bearing after all, and we should not split it. // Normally, we only want to break up blocks which have fairly trivial PHI resolves. LOGW("Was asked to duplicate node %s, but cannot split phis without crashing ...\n", node->name.c_str()); continue; } duplicate_node(node); } recompute_cfg(); } bool CFGStructurizer::ladder_chain_has_phi_dependencies(const CFGNode *succ, const CFGNode *node) { while (succ) { for (auto &phi : succ->ir.phi) for (auto &incoming : phi.incoming) if (incoming.block == node) return true; if (succ->succ.size() == 1) succ = succ->succ.front(); else succ = nullptr; } return false; } void CFGStructurizer::eliminate_degenerate_blocks() { // After we create ladder blocks, we will likely end up with a lot of blocks which don't do much. // We might also have created merge scenarios which should *not* merge, i.e. cleanup_breaking_phi_constructs(), // except we caused it ourselves. // Eliminate bottom-up. First eliminate B, in A -> B -> C, where B contributes nothing. bool did_work = false; for (auto *node : forward_post_visit_order) { if (node->ir.operations.empty() && node->ir.phi.empty() && !node->pred_back_edge && !node->succ_back_edge && !node->is_pseudo_back_edge && node->succ.size() == 1 && node->ir.terminator.type == Terminator::Type::Branch && node->merge == MergeType::None && // Loop merge targets are sacred, and must not be removed. structured_loop_merge_targets.count(node) == 0 && !ladder_chain_has_phi_dependencies(node->succ.front(), node)) { auto check_is_load_bearing_continue_succ = [node](const CFGNode *n) { if (!n->succ_back_edge) return false; // If we eliminate the block, we want the succ to post-dominate the header, // so it can be considered a merge block. // Similarly, we want the header to dominate the succ. if (!node->succ.front()->post_dominates(n->succ_back_edge)) return true; if (!n->succ_back_edge->dominates(node->succ.front())) return true; // No point in eliminating since we're inside the construct. if (n->dominates(node)) return true; return false; }; // If any pred is a continue block, this block is also load-bearing, since it can be used as a merge block. // Even if a continue block branches to us, it may be a fake load bearing block. // If the succ of node post-dominates the entire loop construct, we can eliminate the block safely // since we're not taking away a nice merge target. if (std::find_if(node->pred.begin(), node->pred.end(), check_is_load_bearing_continue_succ) != node->pred.end()) continue; // We might be a viable merge target for an infinite loop. If we only have one pred, we're probably not // a painful break merge. Removing this block shouldn't be problematic for correctness, but removing // a block only to add back a ladder is a little silly. if (node->pred.size() == 1 && node->pred.front()->pred_back_edge && node->pred.front()->pred_back_edge->succ.empty()) continue; // If any succ is a continue block, this block is also load-bearing, since it can be used as a merge block // (merge-to-continue ladder). if (std::find_if(node->succ.begin(), node->succ.end(), [](const CFGNode *n) { return n->succ_back_edge != nullptr; }) != node->succ.end()) { continue; } auto *succ = node->succ.front(); if (node->pred.size() == 1 && node->post_dominates(node->pred.front())) { // Trivial case. did_work = true; auto *pred = node->pred.front(); pred->retarget_branch(node, succ); pred->dominance_frontier.clear(); // Propagates any idom information up to pred if pred dominates succ. recompute_dominance_frontier(succ); recompute_dominance_frontier(pred); } else if (merge_candidate_is_inside_continue_construct(node) || merge_candidate_is_on_breaking_path(node)) { // If we have two or more preds, we have to be really careful. // If this node is on a breaking path, without being important for merging control flow, // it is fine to eliminate the block. did_work = true; auto tmp_pred = node->pred; for (auto *pred : tmp_pred) pred->retarget_branch_with_intermediate_node(node, node->succ.front()); // Iteratively, we need to recompute the dominance frontier for all preds. // When we eliminate nodes like this, we might cause the pred blocks to become degenerate in // future iterations in this loop. std::sort(tmp_pred.begin(), tmp_pred.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order < b->forward_post_visit_order; }); // Need to compute dominance frontiers from inside out. for (auto *pred : tmp_pred) { pred->dominance_frontier.clear(); recompute_dominance_frontier(pred); } } } } if (did_work) recompute_cfg(); } void CFGStructurizer::prune_dead_preds() { // We do not want to see unreachable preds. // Having a pred means we need to map it to an incoming value when dealing with PHI. for (auto *node : forward_post_visit_order) { auto itr = std::remove_if(node->pred.begin(), node->pred.end(), [&](const CFGNode *node) { return reachable_nodes.count(node) == 0; }); node->pred.erase(itr, node->pred.end()); } } static void rewrite_consumed_ids(IRBlock &ir, spv::Id from, spv::Id to) { for (auto *op : ir.operations) { for (unsigned i = 0; i < op->num_arguments; i++) { if ((op->literal_mask & (1u << i)) == 0) if (op->arguments[i] == from) op->arguments[i] = to; } } if (ir.terminator.conditional_id == from) ir.terminator.conditional_id = to; if (ir.terminator.return_value == from) ir.terminator.return_value = to; } void CFGStructurizer::fixup_loop_header_undef_phis() { auto &builder = module.get_builder(); recompute_cfg(); // If the incoming value to the loop is undef, something is deeply wrong. // This is almost a guarantee that we will consume the value as undef, causing breakage in the wild. // Observed in Dune. for (auto *node : forward_post_visit_order) { if (!node->pred_back_edge) continue; for (auto &phi : node->ir.phi) for (auto &incoming : phi.incoming) if (incoming.block != node && incoming.block->dominates(node)) if (const auto *inst = builder.getInstruction(incoming.id)) if (inst->getOpCode() == spv::OpUndef) incoming.id = builder.makeNullConstant(phi.type_id); } } static bool type_class_is_opaque(spv::Op type_op) { return type_op == spv::OpTypeImage || type_op == spv::OpTypeSampler || type_op == spv::OpTypeAccelerationStructureKHR; } void CFGStructurizer::fixup_broken_value_dominance() { struct Origin { CFGNode *node; spv::Id type_id; const Operation *rematerialize_op; }; UnorderedMap origin; UnorderedMap> id_to_non_local_consumers; // First, scan through all blocks and figure out which block creates an ID. for (auto *node : forward_post_visit_order) { for (auto *op : node->ir.operations) { // OpVariable is always hoisted to function entry or above. // It can never not have dominance relationship. if (op->op != spv::OpVariable && op->id) origin[op->id] = { node, op->type_id, op->op == spv::OpSampledImage ? op : nullptr }; } for (auto &phi : node->ir.phi) origin[phi.id] = { node, phi.type_id, nullptr }; } const auto sort_unique_node_vector = [](Vector &nodes) { // Fixup nodes in order. std::sort(nodes.begin(), nodes.end(), [](const CFGNode *a, const CFGNode *b) -> bool { return a->forward_post_visit_order > b->forward_post_visit_order; }); nodes.erase(std::unique(nodes.begin(), nodes.end()), nodes.end()); }; const auto mark_node_value_access = [&](CFGNode *node, spv::Id id) { auto origin_itr = origin.find(id); if (origin_itr == origin.end()) return; auto *origin_node = origin_itr->second.node; if (!origin_node->dominates(node) || (origin_itr->second.rematerialize_op && node != origin_node)) { // We have a problem. Mark that we need to rewrite a certain variable. id_to_non_local_consumers[id].push_back(node); } }; // Need value copy here since we might be updating node->ir.operations inline leading to iterator invalidation. Vector variable_pointer_like_operations[2]; // Now, scan through all blocks and figure out which values are consumed in different blocks. for (auto *node : forward_post_visit_order) { for (auto *op : node->ir.operations) { auto literal_mask = op->literal_mask; for (unsigned i = 0; i < op->num_arguments; i++) if (((1u << i) & literal_mask) == 0) mark_node_value_access(node, op->arguments[i]); if (op->op == spv::OpLoad && type_class_is_opaque(module.get_builder().getTypeClass(op->type_id))) variable_pointer_like_operations[0].push_back(*op); else if (op->op == spv::OpAccessChain) variable_pointer_like_operations[1].push_back(*op); } // Incoming PHI values are handled elsewhere by modifying the incoming block to the creating block. // Ignore these kinds of usage here. if (node->ir.terminator.conditional_id != 0) mark_node_value_access(node, node->ir.terminator.conditional_id); if (node->ir.terminator.return_value != 0) mark_node_value_access(node, node->ir.terminator.return_value); } // First, sink any opaque objects which are accessed in unexpected blocks after CFG rewrite. for (auto &rewrite_ordering : variable_pointer_like_operations) { for (auto &variable_op : rewrite_ordering) { auto itr = id_to_non_local_consumers.find(variable_op.id); if (itr != id_to_non_local_consumers.end()) { // We will need to sink the operation. // Make sure all dependencies are also marked as used in potentially non-local block. // Sort for deterministic output. Vector local_consumers_sorted; for (auto *non_local_node : itr->second) local_consumers_sorted.push_back(non_local_node); std::sort(local_consumers_sorted.begin(), local_consumers_sorted.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order < b->forward_post_visit_order; }); auto literal_mask = variable_op.literal_mask; for (unsigned i = 0; i < variable_op.num_arguments; i++) if (((1u << i) & literal_mask) == 0) for (auto *non_local_node : local_consumers_sorted) mark_node_value_access(non_local_node, variable_op.arguments[i]); for (auto *non_local_node : local_consumers_sorted) { auto *sunk_chain = module.allocate_op(); *sunk_chain = variable_op; sunk_chain->id = module.allocate_id(); if (module.get_builder().hasDecoration(variable_op.id, spv::DecorationNonUniform)) module.get_builder().addDecoration(sunk_chain->id, spv::DecorationNonUniform); auto &ops = non_local_node->ir.operations; rewrite_consumed_ids(non_local_node->ir, variable_op.id, sunk_chain->id); ops.insert(ops.begin(), sunk_chain); } } } } // Resolve these broken PHIs by using OpVariable. It is the simplest solution, and this is a very rare case to begin with. struct Rewrite { spv::Id id; const Vector *consumers; }; Vector rewrites; rewrites.reserve(id_to_non_local_consumers.size()); for (auto &pair : id_to_non_local_consumers) { sort_unique_node_vector(pair.second); rewrites.push_back({ pair.first, &pair.second }); } // Ensure ordering so that output remains stable. std::sort(rewrites.begin(), rewrites.end(), [](const Rewrite &a, const Rewrite &b) { return a.id < b.id; }); for (auto &rewrite : rewrites) { auto &orig = origin[rewrite.id]; // We don't rely on VariablePointers, so if this comes up, we need to figure out something else. // These kinds of ops are handled specially by re-creating them as needed. bool rematerialized = module.get_builder().isPointerType(orig.type_id) || type_class_is_opaque(module.get_builder().getTypeClass(orig.type_id)); if (orig.rematerialize_op) { auto *rematerialize_op = module.allocate_op(); *rematerialize_op = *orig.rematerialize_op; rematerialize_op->id = module.allocate_id(); if (module.get_builder().hasDecoration(orig.rematerialize_op->id, spv::DecorationNonUniform)) module.get_builder().addDecoration(rematerialize_op->id, spv::DecorationNonUniform); for (auto *consumer : *rewrite.consumers) { rewrite_consumed_ids(consumer->ir, rewrite.id, rematerialize_op->id); consumer->ir.operations.insert(consumer->ir.operations.begin(), rematerialize_op); } } else if (!rematerialized) { // Invalid access chains are resolved above. We end up rewriting any non-dominated values instead. spv::Id alloca_var_id = module.create_variable(spv::StorageClassFunction, orig.type_id); auto *store_op = module.allocate_op(spv::OpStore); store_op->add_id(alloca_var_id); store_op->add_id(rewrite.id); orig.node->ir.operations.push_back(store_op); // For every non-local node which consumes ID, we load from the alloca'd variable instead. // Rewrite all ID references to point to the loaded value. for (auto *consumer : *rewrite.consumers) { spv::Id loaded_id = module.allocate_id(); auto *load_op = module.allocate_op(spv::OpLoad, loaded_id, orig.type_id); load_op->add_id(alloca_var_id); rewrite_consumed_ids(consumer->ir, rewrite.id, loaded_id); consumer->ir.operations.insert(consumer->ir.operations.begin(), load_op); } } } } void CFGStructurizer::insert_phi() { // If we inserted dummy branches from back-edge to rewrite infinite loops, we must prune these branches // now, so we don't end up creating a wrong amount of PHI incoming values. // We don't have to recompute the CFG since we don't really care about post-visit orders at this stage. for (auto *node : forward_post_visit_order) { if (node->pred_back_edge && node->pred_back_edge->ir.terminator.type == Terminator::Type::Branch && node->pred_back_edge->succ_back_edge == node->pred_back_edge->ir.terminator.direct_block && node->pred_back_edge->succ.size() == 1) { auto *back_edge = node->pred_back_edge; auto *succ = back_edge->succ.front(); back_edge->succ.clear(); auto itr = std::find(succ->pred.begin(), succ->pred.end(), back_edge); assert(itr != succ->pred.end()); succ->pred.erase(itr); succ->recompute_immediate_dominator(); } } prune_dead_preds(); // It is possible that an SSA value was created in a block, and consumed in another. // With CFG rewriting branches, it is possible that dominance relationship no longer holds // and we must insert new dummy IDs to resolve this. fixup_broken_value_dominance(); // Build a map of value ID -> creating block. // This allows us to detect if a value is consumed in a situation where the declaration does not dominate use. // This can happen when introducing ladder blocks or similar. for (auto *node : forward_post_visit_order) { unsigned phi_index = 0; for (auto &phi : node->ir.phi) { phi_nodes.push_back({ node, phi_index }); if (phi.id) value_id_to_block[phi.id] = node; phi_index++; } for (auto *op : node->ir.operations) if (op->id) value_id_to_block[op->id] = node; } // Resolve phi-nodes top-down since PHI nodes may depend on other PHI nodes. std::sort(phi_nodes.begin(), phi_nodes.end(), [](const PHINode &a, const PHINode &b) { return a.block->forward_post_visit_order > b.block->forward_post_visit_order; }); for (auto &phi_node : phi_nodes) { fixup_phi(phi_node); insert_phi(phi_node); } } Vector::const_iterator CFGStructurizer::find_incoming_value( const CFGNode *frontier_pred, const Vector &incoming) { // Find the incoming block which dominates frontier_pred and has the lowest post visit order. // There are cases where two or more blocks dominate, but we want the most immediate dominator. auto candidate = incoming.end(); for (auto itr = incoming.begin(); itr != incoming.end(); ++itr) { auto *block = itr->block; if (block->dominates(frontier_pred)) { if (candidate == incoming.end() || (block->forward_post_visit_order < candidate->block->forward_post_visit_order)) candidate = itr; } } return candidate; } static IncomingValue *phi_incoming_blocks_find_block(Vector &incomings, const CFGNode *block) { for (auto &incoming : incomings) if (incoming.block == block) return &incoming; return nullptr; } static bool id_is_generated_by_block(const CFGNode *block, spv::Id id) { for (const auto *op : block->ir.operations) if (op->id == id) return true; for (const auto &phi : block->ir.phi) if (phi.id == id) return true; return false; } static void retarget_phi_incoming_block(PHI &phi, CFGNode *from, CFGNode *to) { auto *value = phi_incoming_blocks_find_block(phi.incoming, from); if (value) value->block = to; } void CFGStructurizer::fixup_phi(PHINode &node) { // We want to move any incoming block to where the ID was created. // This avoids some problematic cases of crossing edges when using ladders. auto &phi = node.block->ir.phi[node.phi_index]; auto &incomings = phi.incoming; for (auto &incoming : incomings) { auto itr = value_id_to_block.find(incoming.id); if (itr == end(value_id_to_block)) { // This is a global. continue; } auto *source_block = itr->second; // Only hoist PHI inputs if there used to be a dominance relationship in the original CFG, // but there no longer is. if (!source_block->dominates(incoming.block)) { bool hoist_incoming = true; if (phi_incoming_blocks_find_block(incomings, source_block) != nullptr) { // Sanity check. This would create ambiguity. hoist_incoming = false; } // Don't hoist PHI inputs across the loop header boundary. if (incoming.block->succ_back_edge && query_reachability(*source_block, *incoming.block->succ_back_edge)) { // If this happens somehow, we have a problem. It's a bit unclear how this is supposed to work. // It's possible we'd need to synthesize a fake input to back-edge which can be resolved // in a code path that does dominate the loop ... LOGW("Incoming value to back edge does not dominate loop header.\n"); hoist_incoming = false; } if (hoist_incoming) { #ifdef PHI_DEBUG LOGI("For node %s, move incoming node %s to %s.\n", node.block->name.c_str(), incoming.block->name.c_str(), itr->second->name.c_str()); #endif incoming.block = itr->second; } else { // We cannot hoist, so need to use dummy OpVariable instead. spv::Id alloca_var_id = module.create_variable(spv::StorageClassFunction, phi.type_id, "phi_fixup"); auto *store_op = module.allocate_op(spv::OpStore); store_op->add_id(alloca_var_id); store_op->add_id(incoming.id); itr->second->ir.operations.push_back(store_op); spv::Id loaded_id = module.allocate_id(); auto *load_op = module.allocate_op(spv::OpLoad, loaded_id, phi.type_id); load_op->add_id(alloca_var_id); incoming.block->ir.operations.push_back(load_op); incoming.id = loaded_id; } validate_phi(node.block->ir.phi[node.phi_index]); } } } bool CFGStructurizer::can_complete_phi_insertion(const PHI &phi, const CFGNode *block) { // If all incoming values have at least one pred block they dominate, we can merge the final PHI. auto &incoming_values = phi.incoming; for (auto &incoming : incoming_values) { auto itr = std::find_if(block->pred.begin(), block->pred.end(), [&](const CFGNode *n) { return incoming.block->dominates(n); }); if (itr == block->pred.end() && (!block->pred_back_edge || !incoming.block->dominates(block->pred_back_edge))) { return false; } } return true; } bool CFGStructurizer::query_reachability_through_back_edges(const CFGNode &from, const CFGNode &to) const { if (to.dominates(&from)) { // If we're dominated by end node, only way we can reach is through a back edge. return to.pred_back_edge && query_reachability(from, *to.pred_back_edge); } else return query_reachability(from, to); } bool CFGStructurizer::query_reachability_split_loop_header(const CFGNode &from, const CFGNode &to, const CFGNode &end_node) const { // A special query where from and to must lie on the same side of a loop header to be considered reachable. if (!end_node.pred_back_edge) return query_reachability(from, to); bool from_reaches_header = query_reachability(from, end_node); bool to_reaches_header = query_reachability(to, end_node); if (from_reaches_header != to_reaches_header) return false; return query_reachability(from, to); } bool CFGStructurizer::phi_frontier_makes_forward_progress(const PHI &phi, const CFGNode *frontier, const CFGNode *end_node) const { // Not all PHI frontiers are nodes we need to care about. // There are two conditions we must meet to disregard a placement. // - We do not remove any inputs as a result. // - The frontier can reach another incoming value. // In this situation, a frontier is completely meaningless. auto &incoming = phi.incoming; for (auto &incoming_value : incoming) { auto *incoming_block = incoming_value.block; // We will remove an input, this is forward progress. // Avoid checking the edge case where frontier candidate == incoming block. // Removing an input only to place a new frontier there is nonsensical. if (frontier != incoming_block && !exists_path_in_cfg_without_intermediate_node(incoming_block, end_node, frontier)) return true; } // Nothing is removed as a result, so check if the frontier can reach another incoming value. // If end_node is a loop header, makes sure we only consider a node visible if they are both on the correct side of the // loop header. for (auto &incoming_value : incoming) if (query_reachability_split_loop_header(*frontier, *incoming_value.block, *end_node)) return false; // Assume we make forward progress. Either way, we will never look at a frontier twice, // so this should be safe. The only real risk is that we add some redundant PHI nodes. return true; } void CFGStructurizer::insert_phi(PHINode &node) { // We start off with N values defined in N blocks. // These N blocks *used* to branch to the PHI node, but due to our structurizer, // there might not be branch targets here anymore, primary example here is ladders. // In order to fix this we need to follow control flow from these values and insert phi nodes as necessary to link up // a set of values where dominance frontiers are shared. #ifdef PHI_DEBUG LOGI("\n=== INSERT PHI FOR %s ===\n", node.block->name.c_str()); #endif auto &phi = node.block->ir.phi[node.phi_index]; auto &incoming_values = phi.incoming; UnorderedSet placed_frontiers; for (;;) { #ifdef PHI_DEBUG LOGI("\n=== PHI iteration ===\n"); for (auto &incoming : incoming_values) LOGI(" Incoming value from %s\n", incoming.block->name.c_str()); #endif // Inside the CFG subset, find a dominance frontiers where we merge PHIs this iteration. CFGNode *frontier = node.block; if (!can_complete_phi_insertion(phi, node.block)) { frontier = nullptr; // We need some intermediate merge, so find a frontier node to work on. for (auto &incoming : incoming_values) { for (auto *candidate_frontier : incoming.block->dominance_frontier) { if (placed_frontiers.count(candidate_frontier)) continue; if (!phi_frontier_makes_forward_progress(phi, candidate_frontier, node.block)) { // Makes sure we don't redundantly test this again. placed_frontiers.insert(candidate_frontier); continue; } // Only consider a frontier if we can reach node.block or its back edge from it. if (query_reachability_through_back_edges(*candidate_frontier, *node.block)) { if (frontier == nullptr || candidate_frontier->forward_post_visit_order > frontier->forward_post_visit_order) { // Pick the earliest frontier in the CFG. // We want to merge top to bottom. frontier = candidate_frontier; } } } } if (frontier) placed_frontiers.insert(frontier); } assert(frontier); if (frontier == node.block) { if (frontier->pred.size() == 1 && !frontier->pred_back_edge) { // The PHI node has already been merged. // This can happen if a ladder pred block merged all inputs, and we would // end up with a single-pred PHI, which makes no sense (even if it should work). // Just copy the ID for the frontier node which made the final merge. auto itr = find_incoming_value(frontier->pred.front(), incoming_values); assert(itr != incoming_values.end()); auto *op = module.allocate_op(spv::OpCopyObject, phi.id, phi.type_id); op->add_id(itr->id); frontier->pred.front()->ir.operations.push_back(op); // Ignore this one when emitting PHIs later. phi.id = 0; } else { Vector final_incoming; // Final merge. for (auto *input : frontier->pred) { auto itr = find_incoming_value(input, incoming_values); IncomingValue value = {}; if (itr != incoming_values.end()) value.id = itr->id; else value.id = module.get_builder().createUndefined(phi.type_id); value.block = input; final_incoming.push_back(value); } if (frontier->pred_back_edge) { auto itr = find_incoming_value(frontier->pred_back_edge, incoming_values); IncomingValue value = {}; if (itr != incoming_values.end()) value.id = itr->id; else value.id = module.get_builder().createUndefined(phi.type_id); value.block = frontier->pred_back_edge; final_incoming.push_back(value); } incoming_values = std::move(final_incoming); } return; } // A candidate dominance frontier is a place where we might want to place a PHI node in order to merge values. // For a successful iteration, we need to find at least one candidate where we can merge PHI. #ifdef PHI_DEBUG LOGI("Testing dominance frontier %s ...\n", frontier->name.c_str()); #endif // Remove old inputs. PHI frontier_phi; frontier_phi.id = module.allocate_id(); frontier_phi.type_id = phi.type_id; frontier_phi.relaxed = phi.relaxed; module.get_builder().addName(frontier_phi.id, (String("frontier_phi_") + frontier->name).c_str()); assert(!frontier->pred_back_edge); for (auto *input : frontier->pred) { auto itr = find_incoming_value(input, incoming_values); if (itr != incoming_values.end()) { #ifdef PHI_DEBUG auto *incoming_block = itr->block; LOGI(" ... found incoming block %s for input %s.\n", incoming_block->name.c_str(), input->name.c_str()); LOGI(" ... For pred %s (%p), found incoming value from %s (%p)\n", input->name.c_str(), static_cast(input), incoming_block->name.c_str(), static_cast(incoming_block)); #endif IncomingValue value = {}; value.id = itr->id; value.block = input; frontier_phi.incoming.push_back(value); } else { #ifdef PHI_DEBUG LOGI(" ... creating undefined input for %s\n", input->name.c_str()); #endif // If there is no incoming value, we need to hallucinate an undefined value. IncomingValue value = {}; value.id = module.get_builder().createUndefined(phi.type_id); value.block = input; frontier_phi.incoming.push_back(value); } } // Do we remove the incoming value now or not? // If all paths from incoming value must go through frontier, we can remove it, // otherwise, we might still need to use the incoming value somewhere else. size_t num_alive_incoming_values = incoming_values.size(); for (size_t i = 0; i < num_alive_incoming_values; ) { auto *incoming_block = incoming_values[i].block; // This is fundamentally ambiguous and should never happen. if (incoming_block == frontier) LOGE("Invalid PHI collapse detected!\n"); assert(incoming_block != frontier); if (!exists_path_in_cfg_without_intermediate_node(incoming_block, node.block, frontier)) { #ifdef PHI_DEBUG LOGI(" ... removing input in %s\n", incoming_block->name.c_str()); #endif if (i != num_alive_incoming_values - 1) std::swap(incoming_values[num_alive_incoming_values - 1], incoming_values[i]); num_alive_incoming_values--; } else { #ifdef PHI_DEBUG LOGI(" ... keeping input in %s\n", incoming_block->name.c_str()); #endif i++; } } // Need to clean up exhausted incoming values after the loop, // since an incoming value can be used multiple times before a frontier PHI is resolved. incoming_values.erase(incoming_values.begin() + num_alive_incoming_values, incoming_values.end()); IncomingValue *dominated_incoming = nullptr; for (auto &incoming : incoming_values) { if (frontier->dominates(incoming.block) && !exists_path_in_cfg_without_intermediate_node(frontier, node.block, incoming.block)) { // There should be only one block the frontier can dominate. // The candidate block must also post-dominate the frontier on the CFG subset which terminates at node.block, // otherwise we will get a proper merge later anyways. assert(!dominated_incoming); dominated_incoming = &incoming; } } if (dominated_incoming) { // If our frontier dominates another incoming block, we need to merge two incoming values // using an auxillary phi node as well as an OpSelect to resolve two conflicting values into one. // For every pred edge of the frontier where pred did not dominate, we are now suddenly dominating. // If we came from such a block, // we should replace the incoming value of dominating_incoming rather than adding a new incoming value. PHI merge_phi = {}; merge_phi.relaxed = phi.relaxed; // Here we need to figure out if we have a cross branch which functions as a ladder. // If we have such a special edge, the PHI value we find here will override any other value on this path. // However, if we only have expected branches, there is nothing to override, and any PHI values // we created along this path turned out to be irrelevant after all. unsigned normal_branch_count = 0; for (auto *input : frontier->pred) { IncomingValue value = {}; auto itr = find_incoming_value(input, incoming_values); if (itr != incoming_values.end()) { // If the input does not dominate the frontier, this might be a case of cross-edge PHI merge. // However, if we still have an incoming value which dominates the input block, ignore. // This is considered a normal path and we will merge the actual result in a later iteration, because // the frontier is not a post-dominator of the input value. bool input_is_normal_edge = true; if (!input->dominates(frontier)) { input_is_normal_edge = false; for (auto &incoming : incoming_values) { if (incoming.block->dominates(input)) { input_is_normal_edge = true; break; } } } if (input_is_normal_edge) normal_branch_count++; value.id = module.get_builder().makeBoolConstant(input_is_normal_edge); } else { // The input is undefined, so we don't really care. Just treat this as a normal edge. normal_branch_count++; value.id = module.get_builder().makeBoolConstant(true); } value.block = input; merge_phi.incoming.push_back(value); } if (normal_branch_count != frontier->pred.size()) { merge_phi.id = module.allocate_id(); merge_phi.type_id = module.get_builder().makeBoolType(); Operation *op = module.allocate_op(spv::OpSelect, module.allocate_id(), phi.type_id); op->add_id(merge_phi.id); op->add_id(dominated_incoming->id); op->add_id(frontier_phi.id); dominated_incoming->block->ir.operations.push_back(op); dominated_incoming->id = op->id; module.get_builder().addName(merge_phi.id, (String("merged_phi_") + dominated_incoming->block->name).c_str()); frontier->ir.phi.push_back(std::move(merge_phi)); } } else { // Replace with merged value. IncomingValue new_incoming = {}; new_incoming.id = frontier_phi.id; new_incoming.block = frontier; incoming_values.push_back(new_incoming); } #ifdef PHI_DEBUG LOGI("=========================\n"); #endif frontier->ir.phi.push_back(std::move(frontier_phi)); } } void CFGStructurizer::compute_dominance_frontier() { for (auto *node : forward_post_visit_order) node->dominance_frontier.clear(); for (auto *node : forward_post_visit_order) recompute_dominance_frontier(node); } void CFGStructurizer::compute_post_dominance_frontier() { for (auto *node : backward_post_visit_order) node->post_dominance_frontier.clear(); for (auto *node : backward_post_visit_order) recompute_post_dominance_frontier(node); } void CFGStructurizer::build_immediate_dominators() { for (auto i = forward_post_visit_order.size(); i; i--) { auto *block = forward_post_visit_order[i - 1]; block->recompute_immediate_dominator(); } } void CFGStructurizer::build_immediate_post_dominators() { for (auto i = backward_post_visit_order.size(); i; i--) { auto *block = backward_post_visit_order[i - 1]; block->recompute_immediate_post_dominator(); } } void CFGStructurizer::reset_traversal() { reachable_nodes.clear(); forward_post_visit_order.clear(); backward_post_visit_order.clear(); pool.for_each_node([](CFGNode &node) { node.visited = false; node.backward_visited = false; node.traversing = false; node.immediate_dominator = nullptr; node.immediate_post_dominator = nullptr; node.fake_pred.clear(); node.fake_succ.clear(); node.headers.clear(); if (!node.freeze_structured_analysis) { node.merge = MergeType::None; node.loop_merge_block = nullptr; node.loop_ladder_block = nullptr; node.selection_merge_block = nullptr; } if (node.succ_back_edge) node.succ.push_back(node.succ_back_edge); if (node.pred_back_edge) node.pred.push_back(node.pred_back_edge); node.succ_back_edge = nullptr; node.pred_back_edge = nullptr; }); } struct LoopBacktracer { void trace_to_parent(CFGNode *header, CFGNode *block); UnorderedSet traced_blocks; }; struct LoopMergeTracer { explicit LoopMergeTracer(const LoopBacktracer &backtracer_) : backtracer(backtracer_) { } void trace_from_parent(CFGNode *header); const LoopBacktracer &backtracer; Vector loop_exits; UnorderedSet traced_blocks; }; void LoopBacktracer::trace_to_parent(CFGNode *header, CFGNode *block) { if (block == header) { traced_blocks.insert(block); return; } if (traced_blocks.count(block) == 0) { traced_blocks.insert(block); for (auto *p : block->pred) trace_to_parent(header, p); // A backtrace will not pick up continue blocks which only branch back to header, // and thus they will be considered loop exists by mistake. // Start traversing from the continue block to catch these nodes as well. // If a loop header is part of an outer loop construct, the loop body must // also be part of the loop construct. if (block->pred_back_edge) trace_to_parent(header, block->pred_back_edge); } } void LoopMergeTracer::trace_from_parent(CFGNode *header) { if (backtracer.traced_blocks.count(header) == 0) { if (std::find(loop_exits.begin(), loop_exits.end(), header) == loop_exits.end()) loop_exits.push_back(header); return; } for (auto *succ : header->succ) { if (traced_blocks.count(succ) == 0) { trace_from_parent(succ); traced_blocks.insert(succ); } } } void CFGStructurizer::backwards_visit() { Vector leaf_nodes; // Traverse from leaf nodes, back through their preds instead. // Clear out some state set by forward visit earlier. for (auto *node : forward_post_visit_order) { node->backward_visited = false; node->traversing = false; // For loops which can only exit from their header block, // certain loops will be unreachable when doing a backwards traversal. // We'll visit them explicitly later. if (node->succ.empty() && !node->succ_back_edge) leaf_nodes.push_back(node); } for (auto *leaf : leaf_nodes) backwards_visit(*leaf); // It might be case that some continue blocks are not reachable through backwards traversal. // This effectively means that our flipped CFG is not reducible, which is rather annoying. // To work around this, we fake some branches from the continue block out to other blocks. // This way, we ensure that every forward-reachable block is reachable in a backwards traversal as well. // The algorithm works where given the innermost loop header A, a block B (A dom B) and continue block C, // For successors of B, we will observe some successors which can reach C ({E}), and some successors which can not reach C. // C will add fake successor edges to {E}. bool need_revisit = false; for (size_t i = forward_post_visit_order.size(); i; i--) { // Resolve outer loops before inner loops since we can have nested loops which need // to link into each other. auto *node = forward_post_visit_order[i - 1]; if (node->pred_back_edge) { if (!node->pred_back_edge->backward_visited) { LoopBacktracer tracer; tracer.trace_to_parent(node, node->pred_back_edge); LoopMergeTracer merge_tracer(tracer); merge_tracer.trace_from_parent(node); // If we have an infinite loop, the continue block will not be reachable with backwards traversal. // Also, the only way to exit the loop construct could be through a single return block. // In this case, the return block should be moved and considered to be the merge block. // We add true branches from the continue block to return block instead of fake branches. // Ensure stable codegen order. Vector exits; exits.reserve(merge_tracer.loop_exits.size()); for (auto *exit_node : merge_tracer.loop_exits) exits.push_back(exit_node); std::sort(exits.begin(), exits.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; }); bool transpose_loop_exit = false; if (exits.size() == 1) { auto *exit_node = exits.front(); // If this is true, we never really leave the loop, which is problematic. transpose_loop_exit = exit_node->dominates_all_reachable_exits(); // Only transpose if we're the innermost header, otherwise, inner loops which try to branch // to the return will be considered a multi-break which is very awkward. if (transpose_loop_exit) { auto *innermost_header = get_innermost_loop_header_for(node, exit_node); transpose_loop_exit = innermost_header == node; } } if (transpose_loop_exit) { for (auto *f : exits) node->pred_back_edge->add_branch(f); } else { // Only consider exits that are themselves backwards reachable. // Otherwise, we'll be adding fake succs that resolve to outer infinite loops again. for (auto *f : exits) if (f->reaches_backward_visited_node()) node->pred_back_edge->add_fake_branch(f); } if (!node->pred_back_edge->succ.empty() || !node->pred_back_edge->fake_succ.empty()) { // Consider this to be backwards visited in case we have a nested inner loop // that needs to link up to node->pred_back_edge. node->pred_back_edge->backward_visited = true; } need_revisit = true; } } } if (need_revisit) { for (auto *node : forward_post_visit_order) { node->backward_visited = false; node->traversing = false; node->backward_post_visit_order = 0; } for (auto *leaf : leaf_nodes) backwards_visit(*leaf); } exit_block->backward_post_visit_order = backward_post_visit_order.size(); exit_block->immediate_post_dominator = exit_block; exit_block->backward_visited = true; for (auto *leaf : leaf_nodes) leaf->immediate_post_dominator = exit_block; } void CFGStructurizer::backwards_visit(CFGNode &entry) { entry.backward_visited = true; for (auto *pred : entry.pred) if (!pred->backward_visited) backwards_visit(*pred); for (auto *pred : entry.fake_pred) if (!pred->backward_visited) backwards_visit(*pred); entry.backward_post_visit_order = backward_post_visit_order.size(); backward_post_visit_order.push_back(&entry); } void CFGStructurizer::visit_for_back_edge_analysis(CFGNode &entry) { entry.visited = true; entry.traversing = true; reachable_nodes.insert(&entry); for (auto *succ : entry.succ) { // Reuse the existing vector to keep track of back edges. if (succ->traversing) succ->fake_pred.push_back(&entry); else if (!succ->visited) visit_for_back_edge_analysis(*succ); } entry.traversing = false; // After we get here, we must have observed all back edges. // If there is more than one back edge, merge them. if (entry.fake_pred.size() >= 2) { auto *new_back_edge = pool.create_node(); new_back_edge->name = entry.name + ".back-edge-merge"; for (auto *n : entry.fake_pred) n->retarget_branch_pre_traversal(&entry, new_back_edge); new_back_edge->succ.push_back(&entry); new_back_edge->ir.terminator.type = Terminator::Type::Branch; new_back_edge->ir.terminator.direct_block = &entry; new_back_edge->add_branch(&entry); } } void CFGStructurizer::visit(CFGNode &entry) { entry.visited = true; entry.traversing = true; reachable_nodes.insert(&entry); for (auto *succ : entry.succ) { if (succ->traversing) { // For now, only support one back edge. // DXIL seems to obey this. assert(!entry.succ_back_edge || entry.succ_back_edge == succ); entry.succ_back_edge = succ; // For now, only support one back edge. // DXIL seems to obey this. assert(!succ->pred_back_edge || succ->pred_back_edge == &entry); succ->pred_back_edge = &entry; } else if (!succ->visited) visit(*succ); } // Any back edges need to be handled specifically, only keep forward edges in succ/pred lists. // This avoids any infinite loop scenarios and needing to special case a lot of checks. if (entry.succ_back_edge) { auto itr = std::find(entry.succ.begin(), entry.succ.end(), entry.succ_back_edge); if (itr != entry.succ.end()) entry.succ.erase(itr); } if (entry.pred_back_edge) { auto itr = std::find(entry.pred.begin(), entry.pred.end(), entry.pred_back_edge); if (itr != entry.pred.end()) entry.pred.erase(itr); } entry.traversing = false; entry.forward_post_visit_order = forward_post_visit_order.size(); forward_post_visit_order.push_back(&entry); } void CFGStructurizer::merge_to_succ(CFGNode *node, unsigned index) { node->succ[index]->headers.push_back(node); node->selection_merge_block = node->succ[index]; node->merge = MergeType::Selection; //LOGI("Fixup selection merge %s -> %s\n", node->name.c_str(), node->selection_merge_block->name.c_str()); } void CFGStructurizer::isolate_structured(UnorderedSet &nodes, const CFGNode *header, const CFGNode *merge) { for (auto *pred : merge->pred) { if (pred != header && nodes.count(pred) == 0) { nodes.insert(pred); isolate_structured(nodes, header, pred); } } } Vector CFGStructurizer::isolate_structured_sorted(const CFGNode *header, const CFGNode *merge) { UnorderedSet nodes; isolate_structured(nodes, header, merge); Vector sorted; sorted.reserve(nodes.size()); for (auto *node : nodes) sorted.push_back(node); std::sort(sorted.begin(), sorted.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; }); return sorted; } bool CFGStructurizer::block_is_load_bearing(const CFGNode *node, const CFGNode *merge) const { while (merge->succ.size() == 1) { // If we're going to eliminate a block due to impossible merge, // we should look ahead since we might get a false positive. bool breaking = merge_candidate_is_on_breaking_path(merge); if (breaking && !merge->ir.operations.empty() && !block_is_control_dependent(merge)) merge = merge->succ.front(); else break; } return node->pred.size() >= 2 && !exists_path_in_cfg_without_intermediate_node(node->immediate_dominator, merge, node); } bool CFGStructurizer::control_flow_is_escaping_from_loop(const CFGNode *node, const CFGNode *merge) const { bool escaping_path = false; if (node == merge) return escaping_path; assert(merge->post_dominates(node)); // First, test the loop scenario. // If we're inside a loop, we're a break construct if we can prove that: // - node has a loop header which dominates it. // - node cannot reach the continue block. // - Continue block cannot reach node. // - All post-domination frontiers can reach the continue block, meaning that at some point control flow // decided to break out of the loop construct. auto *innermost_loop_header = get_innermost_loop_header_for(node); if (innermost_loop_header && innermost_loop_header->pred_back_edge) { bool dominates_merge = node->dominates(merge); bool can_reach_continue = query_reachability(*node, *innermost_loop_header->pred_back_edge); bool continue_can_reach = query_reachability(*innermost_loop_header->pred_back_edge, *node); bool pdf_can_reach_continue = true; for (auto *frontier : node->post_dominance_frontier) { bool header_dominates_frontier = innermost_loop_header->dominates(frontier); bool frontier_is_inside_loop_construct = query_reachability(*frontier, *innermost_loop_header->pred_back_edge); if (!header_dominates_frontier || !frontier_is_inside_loop_construct) { pdf_can_reach_continue = false; break; } } if (!dominates_merge && !continue_can_reach && !can_reach_continue && pdf_can_reach_continue) escaping_path = true; } return escaping_path; } bool CFGStructurizer::control_flow_is_escaping(const CFGNode *node, const CFGNode *merge) const { if (node == merge) return false; if (control_flow_is_escaping_from_loop(node, merge)) return true; // Try to test if our block is load bearing, in which case it cannot be considered a break block. // If the only path from idom to merge goes through node, it must be considered load bearing, // since removing break paths must not change reachability. if (block_is_load_bearing(node, merge)) return false; // If we have two different switch blocks in our PDF frontier something ridiculous is happening // where we effectively have one switch block falling through to another switch block (?!?!?!) // Definitely needs to be split up. unsigned switch_pdf_frontiers = 0; for (auto *frontier : node->post_dominance_frontier) if (frontier->ir.terminator.type == Terminator::Type::Switch) switch_pdf_frontiers++; if (switch_pdf_frontiers >= 2) return true; // If we cannot prove the escape through loop analysis, we might be able to deduce it from domination frontiers. // If control flow is not escaping, then there must exist a dominance frontier node A, // where merge strictly post-dominates A. // This means that control flow can merge somewhere before we hit the merge block, and we consider that // normal structured control flow. bool escaping_path = !node->reaches_domination_frontier_before_merge(merge); // This is a strong check. // If node directly branches to merge, but PDF does not, // we have detected a control flow pattern which is clearly a break. // The PDF candidate must dominate node for this check to be meaningful. if (escaping_path) { for (auto *frontier : node->post_dominance_frontier) if (frontier->dominates(node) && frontier->reaches_domination_frontier_before_merge(merge)) return true; // Strong check as well. // If branching directly to continue block like this, this is a non-merging continue, // which we should always consider an escape. if (node->succ.size() == 1 && node->succ.front()->succ_back_edge) return true; } if (escaping_path && node->ir.operations.empty() && node->ir.phi.empty()) { // If we post-dominate nothing useful or do nothing useful ourselves, // this is a good indication we're a common escape edge ladder block. // This can happen if we have a graph of: // A -> B // A -> C // B -> merge // C -> merge // B -> node // C -> node // node -> merge // This super jank diamond pattern will break the heuristics. // If we only post dominate work from one pred, we're not meaningfully merging anything, // so it should be safe to elide. if (node->count_post_dominates_work_from_incoming_preds() <= 1) return true; } if (escaping_path && node->pred.size() >= 2) { // We also need to consider false positives here, which are mostly only relevant for merge candidates. // One case would be selection construct A, which terminates in block B. B then branches to C. // Earlier in the A -> B construct, there might be a break block D which also branches to B. // This means that C will be a "false" domination frontier of B and our analysis above is wrong. // The algorithm here: // - Get idom of node, which represents the header. For this analysis, we're only interested // in code paths which are dominated by idom. // - Find all preds of merge which are dominated by idom(node). // - Backtrace every pred P until they can reach B, or B can reach P. // - If B has strictly lowest post-visit order, we are not escaping. P was. auto *idom = node->immediate_dominator; bool found_false_positive = false; for (auto *pred : merge->pred) { // Don't care about these. if (!idom->dominates(pred)) continue; while (pred != node && !query_reachability(*pred, *node) && !query_reachability(*node, *pred)) pred = pred->immediate_dominator; // Ignore these. if (pred == node) continue; if (query_reachability(*pred, *node)) { // Seems good. Keep going. If we don't find a counter example, we'll accept this as a false positive. found_false_positive = true; } else { // Indeed, this is an escape. found_false_positive = false; break; } } escaping_path = !found_false_positive; } return escaping_path; } bool CFGStructurizer::block_is_plain_continue(const CFGNode *node) { return node->succ_back_edge != nullptr && node != node->succ_back_edge; } const CFGNode *CFGStructurizer::scan_plain_continue_block(const CFGNode *node) { auto *base_node = node; while (!block_is_plain_continue(node) && base_node->dominates(node) && !node->succ_back_edge && !node->pred_back_edge && node->immediate_post_dominator && node->immediate_post_dominator != node) { node = node->immediate_post_dominator; } return node; } bool CFGStructurizer::selection_requires_structured_header(const CFGNode *node) const { // From SPIR-V spec. SelectionMerge is required for: // ... an OpBranchConditional instruction that has different // True Label and False Label operands where neither are declared merge blocks or Continue Targets. // Ensure that there is a real merge block. // Only safe to do this in pass1, since we're not supposed to rewrite control flow there. // In first passes, it's okay to merge in the wrong direction. // Only consider normal selection merges. Switch and loop exits are stronger than selection exits, // so we don't need to apply special cases. // This consideration is purely to avoid excessive deltas in shader outputs, and having merge // blocks makes SPIRV-Cross output a little more readable. assert(node->succ.size() == 2 && !node->succ_back_edge); // We can use proper merge blocks if both paths converge to same location. // If we have a direct branch to continue block on one path, // we can use merge blocks in the opposing path just fine. for (int i = 0; i < 2; i++) if (query_reachability(*node->succ[i], *node->succ[1 - i]) || block_is_plain_continue(node->succ[i])) return true; for (int i = 0; i < 2; i++) { auto *s = node->succ[i]; bool succ_is_plain_selection_merge = std::find_if(s->headers.begin(), s->headers.end(), [&](const CFGNode *head) { return head->ir.terminator.type != Terminator::Type::Switch && head->merge == MergeType::Selection && head->selection_merge_block == s; }) != s->headers.end(); if (succ_is_plain_selection_merge) return false; } return true; } void CFGStructurizer::fixup_broken_selection_merges(unsigned pass) { // Here we deal with selection branches where one path breaks and one path merges. // This is common case for ladder blocks where we need to merge to the "true" merge block. // The selection header has two succs, but the merge block might only have one pred block, // which means it was not considered a merge candidate earlier in find_selection_merges(). for (auto *node : forward_post_visit_order) { if (node->succ.size() != 2) continue; if (node->merge != MergeType::None) continue; // A continue block will never need to merge execution, but it shouldn't have succ.size() == 2, // but rather succ.size() == 1 and a back edge. if (node->succ_back_edge) continue; bool dominates_a = node->dominates(node->succ[0]); bool dominates_b = node->dominates(node->succ[1]); // Continue blocks should also be considered to have a header already. Makes sure we don't merge to them. bool merge_a_has_header = !node->succ[0]->headers.empty() || block_is_plain_continue(node->succ[0]); bool merge_b_has_header = !node->succ[1]->headers.empty() || block_is_plain_continue(node->succ[1]); if (pass == 1 && !selection_requires_structured_header(node)) continue; int trivial_merge_index = -1; // Only allow the obvious merge candidates in pass 1. // In pass 0, we might have a clear merge candidate, // but the other path might be an escaping edge, which needs to be considered. if (dominates_a && !dominates_b && !merge_a_has_header) { // A is obvious candidate. B is a direct break/continue construct target most likely. merge_to_succ(node, 0); trivial_merge_index = 0; } else if (dominates_b && !dominates_a && !merge_b_has_header) { // B is obvious candidate. A is a direct break/continue construct target most likely. merge_to_succ(node, 1); trivial_merge_index = 1; } else if (dominates_a && dominates_b && !merge_a_has_header && merge_b_has_header) { // Not as obvious of a candidate, but this can happen if one path hits continue block, // and other path hits a ladder merge block. // For do/while(false) style loop, the loop body may dominate the merge block. merge_to_succ(node, 0); trivial_merge_index = 0; } else if (dominates_a && dominates_b && !merge_b_has_header && merge_a_has_header) { // Not as obvious of a candidate, but this can happen if one path hits continue block, // and other path hits a ladder merge block. // For do/while style loop, the loop body may dominate the merge block. merge_to_succ(node, 1); trivial_merge_index = 1; } else if (dominates_a && dominates_b && !merge_a_has_header && !merge_b_has_header) { // We could merge to both, no obvious merge point. // Figure out where execution reconvenes. // If we have a "break"-like construct inside a selection construct, we will not end up dominating the merge block. // This will be fixed up with ladder constructs later in first pass. // In second pass, we will have redirected any branches which escape through a ladder block. // If we find that one path of the selection construct must go through that ladder block, we know we have a break construct. CFGNode *merge = CFGStructurizer::find_common_post_dominator(node->succ); if (merge) { bool dominates_merge = node->dominates(merge); bool merges_to_continue = block_is_plain_continue(merge); // Here we have a likely case where one block is doing a clean "break" out of a loop, and // the other path continues as normal, and then conditionally breaks in a continue block or something similar. bool ambiguous_merge_case = !merges_to_continue && dominates_merge && !merge->headers.empty(); // Happens first iteration. We'll have to split blocks, so register a merge target where we want it. // Otherwise, this is the easy case if we observe it in pass 1. // This shouldn't really happen though, as we'd normally resolve this earlier in find_selection_merges. bool mark_merge_block_case = !merges_to_continue && (merge->headers.empty() || pass == 0); // Another scenario is that we don't dominate the merge block in pass 1. We cannot split blocks now. // Check to see which paths can actually reach the merge target without going through a ladder block. // If we don't go through ladder it means an outer scope will actually reach the merge node. // If we reach a ladder it means a block we dominate will make the escape. // If we're in pass 1 and we still don't dominate our merge target, consider it ambiguous. if (pass == 1 && !dominates_merge) ambiguous_merge_case = true; // Another case is when one path is "breaking" out to a continue block which we don't dominate. // We should not attempt to do ladder breaking here in pass 0 since it's unnecessary. bool tie_break_merge = ambiguous_merge_case || !mark_merge_block_case; bool a_path_is_break = control_flow_is_escaping(node->succ[0], merge); bool a_path_is_continue = block_is_plain_continue(scan_plain_continue_block(node->succ[0])); bool b_path_is_break = control_flow_is_escaping(node->succ[1], merge); bool b_path_is_continue = block_is_plain_continue(scan_plain_continue_block(node->succ[1])); bool a_path_is_break_or_continue = a_path_is_break || a_path_is_continue; bool b_path_is_break_or_continue = b_path_is_break || b_path_is_continue; // Continue is stronger than break. A breaking path may still need to merge control flow // especially if that breaking path is very complicated. If we detect continue, the back-edge // post-dominates our succ, so we are guaranteed to never need to merge control flow on that path. // Demote the other path to a non-breaking path. if (a_path_is_continue != b_path_is_continue) { tie_break_merge = true; if (a_path_is_continue) b_path_is_break_or_continue = false; else a_path_is_break_or_continue = false; } if (tie_break_merge) { if (a_path_is_break_or_continue && b_path_is_break_or_continue) { // Both paths break, so we don't need to merge anything. Use Unreachable merge target. node->merge = MergeType::Selection; node->selection_merge_block = nullptr; //LOGI("Merging %s -> Unreachable\n", node->name.c_str()); } else if (b_path_is_break_or_continue) merge_to_succ(node, 0); else if (a_path_is_break_or_continue) merge_to_succ(node, 1); else { // Need more interesting tie-breaking. // We can deduce which path is breaking or not based on the dominance frontier. // If a dominance frontier for A can reach B, then we assume that B is breaking further than A // is, so we should merge to A. // The breaking path for B will likely need to ensure that the selection header can // support such a break. // If we hit this path, the common post-dominator will not find the intended merge // target for B, so we never get to perform the necessary fixup. auto *a_front = node->succ[0]->dominance_frontier.size() == 1 ? node->succ[0]->dominance_frontier.front() : nullptr; auto *b_front = node->succ[1]->dominance_frontier.size() == 1 ? node->succ[1]->dominance_frontier.front() : nullptr; bool found_candidate = false; CFGNode *inner_merge_candidate = nullptr; // If there is no unique dominance frontier for one path, pick the one that has a unique frontier // as that in considered a merge. if ((a_front || b_front) && a_front != b_front) { if (!b_front || (a_front && query_reachability(*a_front, *b_front))) { merge_to_succ(node, 0); inner_merge_candidate = b_front; found_candidate = true; } else if (!a_front || (b_front && query_reachability(*b_front, *a_front))) { merge_to_succ(node, 1); inner_merge_candidate = a_front; found_candidate = true; } } if (!found_candidate) { node->merge = MergeType::Selection; node->selection_merge_block = nullptr; if (a_front && b_front && a_front->headers.size() == 1 && b_front->headers.size() == 1) { // Extremely ambiguous merge where the selection construct can merge to two different paths. // Our only option at this point is to pick an arbitrary winner // and consider one path the breaking one arbitrarily. auto *a_header = a_front->headers.front(); auto *b_header = b_front->headers.front(); // Pick the largest enclosing header as a heuristic. inner_merge_candidate = a_header->forward_post_visit_order > b_header->forward_post_visit_order ? a_front : b_front; } } if (inner_merge_candidate && inner_merge_candidate->headers.size() == 1) { // The breaking path tries to break to this node. // This will only trigger in pass 1. auto *header = inner_merge_candidate->headers.front(); if (header->merge == MergeType::Selection) { // Promote to loop header instead. // We might have to enter the loop ladder fixup stages later // to insert ladders as required. header->merge = MergeType::Loop; header->loop_merge_block = header->selection_merge_block; header->selection_merge_block = nullptr; header->freeze_structured_analysis = true; } } } } else { assert(merge); node->selection_merge_block = merge; node->merge = MergeType::Selection; merge->headers.push_back(node); //LOGI("Merging %s -> %s\n", node->name.c_str(), node->selection_merge_block->name.c_str()); } } else { // We likely had one side of the branch take an "exit", in which case there is no common post-dominator. bool a_dominates_exit = node->succ[0]->dominates_all_reachable_exits(); bool b_dominates_exit = node->succ[1]->dominates_all_reachable_exits(); if (!a_dominates_exit && b_dominates_exit) merge_to_succ(node, 0); else if (!b_dominates_exit && a_dominates_exit) merge_to_succ(node, 1); else { // Both paths lead to exit. Do we even need to merge here? // In worst case we can always merge to an unreachable node in the CFG. node->merge = MergeType::Selection; node->selection_merge_block = nullptr; const auto node_is_degenerate_merge_block = [](const CFGNode *n) { return n->ir.terminator.type == Terminator::Type::Unreachable || (n->ir.terminator.type == Terminator::Type::Return && n->ir.operations.empty()); }; // In some cases however, we have to try even harder to tie-break these blocks, // since post-domination analysis may break due to early exit blocks. // Use principle of least break to tie-break. if (node->succ[0]->dominance_frontier.size() == 1 && node->succ[1]->dominance_frontier.size() == 1) { auto *a_frontier = node->succ[0]->dominance_frontier.front(); auto *b_frontier = node->succ[1]->dominance_frontier.front(); if (a_frontier != b_frontier) { // Try to merge in the direction of early returns, since the other direction // will likely result in a loop break or something like that. // Inner constructs tend to use weaker selection merges, which means we need // to merge in that direction to stay valid. if (query_reachability(*a_frontier, *b_frontier)) merge_to_succ(node, 0); else if (query_reachability(*b_frontier, *a_frontier)) merge_to_succ(node, 1); else { auto a_succ_count = a_frontier->succ.size(); auto b_succ_count = b_frontier->succ.size(); // First look at the idoms. This can give us an idea how the code is nested. // Merge towards innermost idom. // If that fails, merge against early returns as a last resort. a_frontier = a_frontier->immediate_dominator; b_frontier = b_frontier->immediate_dominator; if (a_frontier != b_frontier && query_reachability(*a_frontier, *b_frontier)) merge_to_succ(node, 1); else if (a_frontier != b_frontier && query_reachability(*b_frontier, *a_frontier)) merge_to_succ(node, 0); else if (a_succ_count == 0 && b_succ_count != 0) merge_to_succ(node, 0); else if (b_succ_count == 0 && a_succ_count != 0) merge_to_succ(node, 1); } } } else if (node_is_degenerate_merge_block(node->succ[1]) && !node_is_degenerate_merge_block(node->succ[0])) { // Try to merge away from blank returns. merge_to_succ(node, 0); } else if (node_is_degenerate_merge_block(node->succ[0]) && !node_is_degenerate_merge_block(node->succ[1])) { // Try to merge away from blank returns. merge_to_succ(node, 1); } } } } else if (pass == 0) { // No possible merge target. Just need to pick whatever node is the merge block here. // Only do this in first pass, so that we can get a proper ladder breaking mechanism in place if we are escaping. CFGNode *merge = CFGStructurizer::find_common_post_dominator(node->succ); if (merge) { // Don't try to merge to our switch block. auto *inner_header = node->get_outer_header_dominator(); bool conditional_switch_break = inner_header && inner_header->merge == MergeType::Selection && inner_header->selection_merge_block == merge; if (!conditional_switch_break) { node->selection_merge_block = merge; node->merge = MergeType::Selection; merge->headers.push_back(node); //LOGI("Merging %s -> %s\n", node->name.c_str(), node->selection_merge_block->name.c_str()); } } else { //LOGI("Cannot find a merge target for block %s ...\n", node->name.c_str()); } } if (trivial_merge_index >= 0 && pass == 0) { CFGNode *merge = CFGStructurizer::find_common_post_dominator(node->succ); if (merge && !node->dominates(merge) && !block_is_plain_continue(merge)) { if (!merge->headers.empty()) { // We might have a trivial merge, yet the other branch direction // is a breaking construct. We will have to split some blocks. merge->headers.push_back(node); } auto *current_candidate = node->succ[trivial_merge_index]; auto *other_candidate = node->succ[1 - trivial_merge_index]; bool current_escapes = current_candidate == merge || control_flow_is_escaping(current_candidate, merge); // It's possible that our other candidate is a merge target. If we don't dominate the candidate, // it means it's on the dominance frontier and we should not consider it escaping. // Trivial heuristic for escape. bool other_escapes = other_candidate == merge || block_is_plain_continue(other_candidate); // Second level heuristic. if (!other_escapes && control_flow_is_escaping(other_candidate, merge)) { // Final layer of hell. if (node->dominates(other_candidate)) { // There is no frontier, so we accept escape analysis as-is. other_escapes = true; } else { // This is a frontier, so it shouldn't be considered an escape, // but if this is a "weak" frontier, we can avoid creating a dummy interim block. // If the other candidate is a loop merge, then we will resolve the merge in another way, // which will make the interim block superfluous. bool other_is_loop_merge_candidate = other_candidate->headers.size() == 1 && other_candidate->headers.front()->merge == MergeType::Loop && (other_candidate->headers.front()->loop_merge_block == other_candidate || other_candidate->headers.front()->loop_ladder_block == other_candidate); other_escapes = other_is_loop_merge_candidate; } } if (!current_escapes && !other_escapes) { // Neither is considered an escape. This is strange and should not happen unless we have // a fake frontier block to contend with. // Attempt to tie-break by observing if current candidate has a direct branch to merge, // but other does not. if (std::find(current_candidate->succ.begin(), current_candidate->succ.end(), merge) != current_candidate->succ.end() && std::find(other_candidate->succ.begin(), other_candidate->succ.end(), merge) == other_candidate->succ.end()) { current_escapes = true; // If current candidate's frontier can reach the other candidate directly, // this is a final tie-break to show that we should accept the current situation. for (auto *frontier : current_candidate->dominance_frontier) { if (frontier != other_candidate && query_reachability(*frontier, *other_candidate)) { current_escapes = false; break; } } } } // If we tried to merge in a direction which is a breaking construct, // this means that the other path is actual desired break path. if (current_escapes && !other_escapes) { auto *target_block = node->succ[1 - trivial_merge_index]; // We kinda want to merge the other way, but to do that, we need an interim block. auto *ladder = pool.create_node(); ladder->name = node->name + "." + target_block->name + ".interim"; ladder->add_branch(target_block); ladder->ir.terminator.type = Terminator::Type::Branch; ladder->ir.terminator.direct_block = target_block; ladder->immediate_dominator = node; ladder->immediate_post_dominator = target_block; ladder->dominance_frontier.push_back(target_block); ladder->forward_post_visit_order = node->forward_post_visit_order; ladder->backward_post_visit_order = node->backward_post_visit_order; node->retarget_branch(target_block, ladder); node->selection_merge_block = ladder; } } } } } void CFGStructurizer::rewrite_selection_breaks(CFGNode *header, CFGNode *ladder_to) { // Don't rewrite loops here (since this is likely a loop merge block), // unless we're rewriting header -> inner construct scenario. // Check if the ladder_to block has a path to continue block. // If it does, it is part of the loop construct, and cannot be a loop merge block. if (header->pred_back_edge && !header->pred_back_edge->can_backtrace_to(ladder_to)) return; // Don't rewrite switch blocks either. if (header->ir.terminator.type == Terminator::Type::Switch) return; //LOGI("Rewriting selection breaks %s -> %s\n", header->name.c_str(), ladder_to->name.c_str()); UnorderedSet construct; // Be careful about rewriting branches in continuing constructs. CFGNode *inner_continue_block = nullptr; CFGNode *inner_continue_succ = nullptr; bool ladder_to_dominates_continue = false; bool break_post_dominates_ladder_to = false; auto *innermost_loop_header = get_innermost_loop_header_for(header); if (innermost_loop_header && innermost_loop_header->pred_back_edge) inner_continue_block = innermost_loop_header->pred_back_edge; if (inner_continue_block && inner_continue_block->succ.size() == 1) { inner_continue_succ = inner_continue_block->succ.front(); break_post_dominates_ladder_to = inner_continue_succ->post_dominates(ladder_to); ladder_to_dominates_continue = ladder_to->dominates(inner_continue_block); } header->traverse_dominated_blocks([&](CFGNode *node) -> bool { // Inner loop headers are not candidates for a rewrite. They are split in split_merge_blocks. // Similar with switch blocks. // Also, we need to stop traversing when we hit the target block ladder_to. if (node != ladder_to) { if (!query_reachability(*node, *ladder_to)) return false; bool branch_is_loop_or_switch = node->pred_back_edge || node->ir.terminator.type == Terminator::Type::Switch; // If our candidate scope splits a loop scope in half, ignore this candidate. if (break_post_dominates_ladder_to && !ladder_to_dominates_continue && node->dominates(inner_continue_block)) { return false; } if (node->succ.size() >= 2 && !branch_is_loop_or_switch) { auto *outer_header = get_post_dominance_frontier_with_cfg_subset_that_reaches(node, ladder_to, nullptr); if (outer_header == header) construct.insert(node); } return true; } else return false; }); Vector sorted_construct; sorted_construct.reserve(construct.size()); for (auto *inner_block : construct) sorted_construct.push_back(inner_block); // Emit inner constructs before outer constructs. // This way we get natural nesting in case of certain if/else if ladders. std::sort(sorted_construct.begin(), sorted_construct.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order < b->forward_post_visit_order; }); for (auto *inner_block : sorted_construct) { //LOGI("Header: %s, Inner: %s.\n", header->name.c_str(), inner_block->name.c_str()); auto *ladder = pool.create_node(); ladder->name = ladder_to->name + "." + inner_block->name + ".ladder"; //LOGI("Walking dominated blocks of %s, rewrite branches %s -> %s.\n", inner_block->name.c_str(), // ladder_to->name.c_str(), ladder->name.c_str()); ladder->add_branch(ladder_to); ladder->ir.terminator.type = Terminator::Type::Branch; ladder->ir.terminator.direct_block = ladder_to; ladder->immediate_post_dominator = ladder_to; ladder->dominance_frontier.push_back(ladder_to); ladder->forward_post_visit_order = ladder_to->forward_post_visit_order; ladder->backward_post_visit_order = ladder_to->backward_post_visit_order; // Stop rewriting once we hit a merge block. traverse_dominated_blocks_and_rewrite_branch(inner_block, ladder_to, ladder, [inner_block](CFGNode *node) -> bool { return inner_block->selection_merge_block != node; }, {}); ladder->recompute_immediate_dominator(); rewrite_selection_breaks(inner_block, ladder); } } bool CFGStructurizer::is_strictly_dominance_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c) { return a != b && a->dominates(b) && b != c && b->dominates(c); } bool CFGStructurizer::is_reachability_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c) { return a != b && query_reachability(*a, *b) && b != c && query_reachability(*b, *c); } bool CFGStructurizer::header_and_merge_block_have_entry_exit_relationship(const CFGNode *header, const CFGNode *merge) const { if (!merge->post_dominates(header)) return false; // If there are other blocks which need merging, and that idom is the header, // then header is some kind of exit block. bool found_inner_merge_target = false; const CFGNode *potential_inner_merge_target = nullptr; const auto is_earlier = [](const CFGNode *candidate, const CFGNode *existing) { return !existing || (candidate->forward_post_visit_order > existing->forward_post_visit_order); }; const auto is_later = [](const CFGNode *candidate, const CFGNode *existing) { return !existing || (candidate->forward_post_visit_order < existing->forward_post_visit_order); }; header->traverse_dominated_blocks([&](const CFGNode *node) { if (node == merge) return false; // Don't analyze loops, this path is mostly for selections only. if (node->pred_back_edge) return false; if (node->num_forward_preds() <= 1) return true; auto *idom = node->immediate_dominator; if (idom == header) { found_inner_merge_target = true; return false; } else if (is_later(node, potential_inner_merge_target) && idom->immediate_post_dominator == merge && !exists_path_in_cfg_without_intermediate_node(header, node, idom)) { // Need to analyze this further to determine if it's one of those insane crossing merge cases ... // Find the lowest post visit order if there are multiple candidates. potential_inner_merge_target = node; } return true; }); if (found_inner_merge_target) return true; if (!potential_inner_merge_target) return false; // Alternatively, try to find a situation where the natural merge is difficult to determine. // In this scenario, selection constructs appear to be "breaking" in different directions. // Any attempt to split scopes here will fail spectacularly. const CFGNode *first_natural_breaks_to_outer = nullptr; const CFGNode *first_natural_breaks_to_inner = nullptr; const CFGNode *last_natural_breaks_to_outer = nullptr; const CFGNode *last_natural_breaks_to_inner = nullptr; header->traverse_dominated_blocks([&](const CFGNode *node) { if (node == merge || node == potential_inner_merge_target) return false; if (!query_reachability(*node, *merge) || !query_reachability(*node, *potential_inner_merge_target)) return false; if (node->succ.size() < 2) return true; bool breaks_to_outer = std::find_if(node->succ.begin(), node->succ.end(), [&](const CFGNode *candidate) { return merge->post_dominates(candidate); }) != node->succ.end(); bool breaks_to_inner = std::find_if(node->succ.begin(), node->succ.end(), [&](const CFGNode *candidate) { return potential_inner_merge_target->post_dominates(candidate); }) != node->succ.end(); if (breaks_to_inner) breaks_to_outer = false; if (breaks_to_outer) { if (is_earlier(node, first_natural_breaks_to_outer)) first_natural_breaks_to_outer = node; if (is_later(node, last_natural_breaks_to_outer)) last_natural_breaks_to_outer = node; } if (breaks_to_inner) { if (is_earlier(node, first_natural_breaks_to_inner)) first_natural_breaks_to_inner = node; if (is_later(node, last_natural_breaks_to_inner)) last_natural_breaks_to_inner = node; } return true; }); if (!first_natural_breaks_to_outer || !first_natural_breaks_to_inner || !last_natural_breaks_to_outer || !last_natural_breaks_to_inner) { return false; } // Crossing break scenario. if (is_strictly_dominance_ordered(first_natural_breaks_to_inner, first_natural_breaks_to_outer, last_natural_breaks_to_inner)) return true; else if (is_strictly_dominance_ordered(first_natural_breaks_to_outer, first_natural_breaks_to_inner, last_natural_breaks_to_outer)) return true; else return false; } bool CFGStructurizer::serialize_interleaved_early_returns() { for (auto *node : forward_post_visit_order) { if (node->num_forward_preds() <= 1) continue; // Never merge to continue block. // We should never hit this path unless we explicitly // avoided creating a continue ladder block earlier. if (block_is_plain_continue(node)) continue; auto *idom = node->immediate_dominator; auto *merge_candidate = CFGNode::find_common_post_dominator(idom, node); bool post_dominator_is_exit_node = merge_candidate && merge_candidate->immediate_post_dominator == merge_candidate; bool merged_into_terminating_path = post_dominator_is_exit_node && node->dominates_all_reachable_exits(); // If our candidate idom post dominates the entry block, we consider this the main path of execution. if (merged_into_terminating_path && idom->post_dominates(entry_block)) merged_into_terminating_path = false; if (merged_into_terminating_path) { // Similar to loops, find the break target for this construct. auto *break_target = find_break_target_for_selection_construct(idom, node); if (break_target) { Vector valid = { break_target, node }; collect_and_dispatch_control_flow(idom, break_target, valid, false, false); // This completely transposes the CFG, so need to recompute CFG to keep going. recompute_cfg(); return true; } } } return false; } bool CFGStructurizer::serialize_interleaved_merge_scopes_aggressive() { for (auto *node : forward_post_visit_order) { // Eagerly collapse ridiculous unrolled loops if they exist. // We normally handle it, but we need to consider cases with cross-branches as well. constexpr size_t PredThreshold = 32; if (node->num_forward_preds() < PredThreshold) continue; if (block_is_plain_continue(node)) continue; auto *idom = node->immediate_dominator; // Only consider simpler cases that we can collapse. if (!node->post_dominates(idom)) continue; // node->forward_post_visit_order should map 1:1 to the post-visit array, // but in extreme circumstances where there have been inline cfg rewrites before recompute, // this may not be true, so be defensive. auto itr = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), node); auto end = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), idom); assert(itr != forward_post_visit_order.end()); assert(end != forward_post_visit_order.end()); Vector constructs; for (; itr != end; ++itr) { auto *candidate = *itr; if (candidate->num_forward_preds() < PredThreshold) continue; if (!idom->dominates(candidate)) continue; auto &df = candidate->dominance_frontier; if (std::find(df.begin(), df.end(), node) != df.end()) constructs.push_back(candidate); } if (constructs.empty()) continue; if (constructs.size() >= 2) filter_serialization_candidates(constructs); if (constructs.empty()) continue; constructs.push_back(node); collect_and_dispatch_control_flow(idom, node, constructs, false, true); recompute_cfg(); return true; } return false; } Vector> CFGStructurizer::build_pdf_ranges(const Vector &candidates) { Vector> pdf_ranges; pdf_ranges.reserve(candidates.size()); // If breaking merge constructs are entangled, their PDFs will overlap. for (auto *candidate : candidates) { auto &pdf = candidate->post_dominance_frontier; assert(!pdf.empty()); CFGNode *first = pdf.front(); CFGNode *last = first; for (auto *n : pdf) { if (n->forward_post_visit_order > first->forward_post_visit_order) first = n; if (n->forward_post_visit_order < last->forward_post_visit_order) last = n; } pdf_ranges.push_back({ first, last }); } return pdf_ranges; } bool CFGStructurizer::pdf_ranges_have_strict_dominance_ordering(const Vector> &pdf_ranges) { bool need_deinterleave = false; auto count = pdf_ranges.size(); for (size_t i = 0; i < count && !need_deinterleave; i++) for (size_t j = 0; j < count && !need_deinterleave; j++) if (i != j) need_deinterleave = is_strictly_dominance_ordered(pdf_ranges[i].first, pdf_ranges[j].first, pdf_ranges[i].second); return need_deinterleave; } void CFGStructurizer::filter_serialization_candidates(Vector &candidates) const { // Ensure stable order. std::sort(candidates.begin(), candidates.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order < b->forward_post_visit_order; }); auto *common_idom = candidates[0]; for (size_t i = 1, n = candidates.size(); i < n; i++) common_idom = CFGNode::find_common_dominator(common_idom, candidates[i]); // Filter out false positive inner constructs. // If we're dominated by another inner construct, and we don't post-dominate that construct, we should yield. for (auto itr = candidates.begin(); itr != candidates.end(); ) { bool eliminated = false; for (auto candidate_itr = itr + 1; candidate_itr != candidates.end(); ++candidate_itr) { bool keep_candidate = (*candidate_itr) == common_idom || !(*candidate_itr)->dominates(*itr) || (*itr)->post_dominates(*candidate_itr); // Don't let the common idom of constructs consume subsequent constructs. if (!keep_candidate) { // To accept a dominator, we don't want any common idom removing every node. std::move(itr + 1, candidates.end(), itr); candidates.pop_back(); eliminated = true; break; } } if (!eliminated) ++itr; } Vector valid_constructs; // Prune any candidate that can reach another candidate. The sort ensures that candidate to be removed comes last. size_t count = candidates.size(); for (size_t i = 0; i < count; i++) { bool valid = true; for (size_t j = 0; j < i; j++) { if (query_reachability(*candidates[i], *candidates[j])) { valid = false; break; } } // Another sanity check for candidates, the idom must be able to reach other nodes. if (valid) { valid = false; for (size_t j = 0; j < count; j++) { if (i == j) continue; if (query_reachability(*candidates[i]->immediate_dominator, *candidates[j])) { valid = true; break; } } } if (valid) valid_constructs.push_back(candidates[i]); } candidates = std::move(valid_constructs); } bool CFGStructurizer::serialize_interleaved_merge_scopes() { // Try to fixup scenarios which arise from unrolled loops with multiple break blocks. // DXC will emit maximal convergence and force all dynamic instances of a given break to branch to the same // block, which then breaks, e.g.: // for (int i = 0; i < CONSTANT; i++) { cond_break_construct1(); cond_break_construct2(); cond_break_construct3(); } // When this unrolls we can end up with merge blocks which are entangled. Only sane way to make this work // is to serialize the breaks to after the merge block. UnorderedSet potential_merge_nodes; for (auto *node : forward_post_visit_order) if (node->num_forward_preds() >= 2 && !block_is_plain_continue(node)) potential_merge_nodes.insert(node); UnorderedSet visited; for (auto *node : forward_post_visit_order) { if (node->num_forward_preds() <= 1) continue; if (block_is_plain_continue(node)) continue; auto *idom = node->immediate_dominator; Vector complex_inner_constructs; Vector constructs; // Find merge block candidates that are strictly dominated by idom and immediately post-dominated by node. // They also must not be good merge candidates on their own. // Also, we're not interested in any loop merge candidates. for (auto *candidate : potential_merge_nodes) { if (candidate != idom && idom->dominates(candidate) && node->post_dominates(candidate) && !candidate->post_dominates_perfect_structured_construct() && get_innermost_loop_header_for(idom, candidate) == idom) { bool direct_dominance_frontier = candidate->dominance_frontier.size() == 1 && candidate->dominance_frontier.front() == node; // The candidate must not try to merge to other code since we might end up introducing loops that way. // All code reachable by candidate must cleanly break to node. // We can make use of a simpler rewrite path if all code paths to node goes through our candidates. // Accept a construct and determine if we need to promote the complex constructs instead of the inner constructs. // The inner construct may just be a false positive that ends up blocking the rewrite. if (direct_dominance_frontier) constructs.push_back(candidate); else complex_inner_constructs.push_back(candidate); } } // If true, we need a complex rewrite. This means taking unrelated branches to node and fuse them into // one big merge. This requires very simple control flow from the candidates, // since otherwise we end up with unintended loops in the rewrite. // The simplified flow requires that all code paths from idom flow through the complex inner candidates. bool collect_all_paths_to_pdom = true; if (constructs.size() == 1 && complex_inner_constructs.size() >= 2) { auto *candidate_inner = constructs.front(); auto *common_idom = candidate_inner; constructs.clear(); // Try to detect a false positive where we should ignore inner_constructs. // Ensure that the inner construct comes after the candidate constructs. bool should_promote_complex = true; for (auto *candidate : complex_inner_constructs) { if (!query_reachability(*candidate, *candidate_inner)) { should_promote_complex = false; break; } } if (should_promote_complex) { // The inner candidate should not post-dominate any other candidate block. // We're looking for unusual merge patterns here. for (auto *pred : complex_inner_constructs) { if (candidate_inner->post_dominates(pred)) { should_promote_complex = false; break; } } } if (should_promote_complex) { // In complex merges, we focus on merging as early as possible, rather than as late as possible. // Remove any candidates which are reachable by other candidates. // Disregard the inner constructs, promote the complex ones. collect_all_paths_to_pdom = false; // Ensure stable order. std::sort(complex_inner_constructs.begin(), complex_inner_constructs.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; }); size_t count = complex_inner_constructs.size(); for (size_t j = 0; j < count; j++) { bool is_reachable = false; for (size_t i = 0; i < j && !is_reachable; i++) if (query_reachability(*complex_inner_constructs[i], *complex_inner_constructs[j])) is_reachable = true; if (!is_reachable) constructs.push_back(complex_inner_constructs[j]); } } if (should_promote_complex && constructs.size() >= 2) { for (auto *inner : constructs) common_idom = CFGNode::find_common_dominator(common_idom, inner); // Verify that all paths to node must go through the inner constructs. // We cannot handle more awkward merges. should_promote_complex = !node->can_backtrace_to_with_blockers(common_idom, constructs); } if (!should_promote_complex) continue; } if (constructs.size() < 2) continue; filter_serialization_candidates(constructs); if (constructs.size() < 2) continue; auto pdf_ranges = build_pdf_ranges(constructs); bool need_deinterleave = pdf_ranges_have_strict_dominance_ordering(pdf_ranges); size_t count = constructs.size(); CFGNode *common_anchor = nullptr; if (!need_deinterleave) { // Detect a complicated pattern that comes up which looks a lot like interleaved merges, but isn't really. // A B // |\ /| // | \ / | // | E | // | / \ | // C D // \ / // \ / // F // Candidates: {C, D} // Where {A, E} is pdf range of C // and {B, E} is pdf range of D // The last PDF can be considered a merge anchor that distributes code further. // E must have {C, D} - and only those - in the dominance frontier. common_anchor = pdf_ranges[0].second; bool can_be_anchor = common_anchor->pred.size() >= 2 || (common_anchor->pred.size() == 1 && common_anchor->pred.front()->succ_back_edge); need_deinterleave = common_anchor->dominance_frontier.size() == count && common_anchor->succ.size() == count && common_anchor->ir.terminator.type == Terminator::Type::Condition && can_be_anchor; for (size_t i = 0; i < count && need_deinterleave; i++) { need_deinterleave = query_reachability(*pdf_ranges[i].first, *pdf_ranges[i].second) && pdf_ranges[0].second == pdf_ranges[i].second; need_deinterleave = need_deinterleave && std::find(common_anchor->dominance_frontier.begin(), common_anchor->dominance_frontier.end(), constructs[i]) != common_anchor->dominance_frontier.end(); } if (!need_deinterleave) common_anchor = nullptr; } if (!need_deinterleave) { const CFGNode *interleaved_exit_loop = nullptr; // Try finding interleaved loops exits. Extremely rare and awkward scenario. // This pattern makes it so that loop resolves cannot work well since nothing ends up being nested. // We can deal with one, but if two or more loops end up with awkward resolves, we have to employ magic. // First, look at the PDFs, try to find a node in an inner loop. // If the loop exits in a way where they can both reach the interleaving candidates, // that's a scenario where we need to consider rewriting. for (auto *candidate : constructs) { auto &pdf = candidate->post_dominance_frontier; for (auto *pdf_candidate : pdf) { auto *inner_header = get_innermost_loop_header_for(idom, pdf_candidate); if (inner_header != idom && inner_header != interleaved_exit_loop) { // Don't allow nested loops to be considered as two loops. if (interleaved_exit_loop && query_reachability(*inner_header, *interleaved_exit_loop)) continue; if (query_reachability(*pdf_candidate, *inner_header->pred_back_edge)) { // The back-edge can only reach one of the interleave nodes, while the candidate PDF // can reach both. This proves weird break cases. unsigned back_edge_reach_count = 0; unsigned pdf_reach_count = 0; for (auto *reach_candidate : constructs) { if (query_reachability(*inner_header->pred_back_edge, *reach_candidate)) back_edge_reach_count++; if (query_reachability(*pdf_candidate, *reach_candidate)) pdf_reach_count++; } if (back_edge_reach_count == 1 && pdf_reach_count == constructs.size()) { // We've found two candidates now, break out. need_deinterleave = interleaved_exit_loop != nullptr; interleaved_exit_loop = inner_header; break; } } } } if (need_deinterleave) break; } } if (!need_deinterleave && count >= 3) { // More special cases. // We might not find an interleaving scenario by looking at strict dominance, // but there might be difficult cases lurking if we look at pure reachability. for (size_t i = 0; i < count && !need_deinterleave; i++) { for (size_t j = 0; j < count && !need_deinterleave; j++) { if (i == j) continue; if (!is_reachability_ordered(pdf_ranges[i].first, pdf_ranges[j].first, pdf_ranges[i].second)) continue; auto &df = pdf_ranges[i].second->dominance_frontier; bool all_in_frontier = true; // If all the valid constructs are in the dominance frontier, consider this a highly difficult case. // If there's just two candidate blocks we can resolve them with ladder breaks, but three and above // can be nested in unexpected ways. This threshold is mostly a heuristic to avoid // doing complex transforms unless we really know for sure we need them. for (size_t k = 0; k < count && all_in_frontier; k++) all_in_frontier = std::find(df.begin(), df.end(), constructs[k]) != df.end(); need_deinterleave = all_in_frontier; } } } if (!need_deinterleave && pdf_ranges[0].first != pdf_ranges[0].second) { // Special case of the above. If the PDFs overlap exactly we have criss-cross merge patterns. // Be very conservative when we accept this since this pattern comes up as innocent // breaking patterns. Some complicating factors is when idom is a loop header and // We don't post-dominate the idom and is more likely a breaking path. bool same_pdfs = true; for (size_t i = 1; i < count && !same_pdfs; i++) same_pdfs = pdf_ranges[i].first == pdf_ranges[0].first && pdf_ranges[i].second == pdf_ranges[0].second; // Heuristic to avoid needing to do needless rewrites. // The issues only seem to manifest in this situation. // Likely the problem is that different idoms with wrong ladder resolve order // can lead to backwards branches in some extremely rare cases ... auto *first = pdf_ranges[0].first->immediate_dominator; auto *second = pdf_ranges[0].second->immediate_dominator; auto *common_idom = CFGNode::find_common_dominator(first, second); bool crossing_idoms = first != second && first != common_idom && second != common_idom && (query_reachability(*first, *second) || query_reachability(*second, *first)); if (same_pdfs && crossing_idoms) { // All PDFs must have all candidates in their DFs. bool all_in_frontier = true; // If all the valid constructs are in the dominance frontier, consider this a highly difficult case. // If there's just two candidate blocks we can resolve them with ladder breaks, but three and above // can be nested in unexpected ways. This threshold is mostly a heuristic to avoid // doing complex transforms unless we really know for sure we need them. const Vector *dfs[] = { &pdf_ranges[0].first->dominance_frontier, &pdf_ranges[0].second->dominance_frontier, }; for (auto *df : dfs) for (size_t i = 0; i < count && all_in_frontier; i++) all_in_frontier = std::find(df->begin(), df->end(), constructs[i]) != df->end(); need_deinterleave = all_in_frontier; } } if (need_deinterleave) { if (common_anchor) collect_and_dispatch_control_flow_from_anchor(common_anchor, constructs); else collect_and_dispatch_control_flow(idom, node, constructs, collect_all_paths_to_pdom, false); // This completely transposes the CFG, so need to recompute CFG to keep going. recompute_cfg(); return true; } } return false; } void CFGStructurizer::split_merge_scopes() { for (auto *node : forward_post_visit_order) { // Setup a preliminary merge scope so we know when to stop traversal. // We don't care about traversing inner scopes, out starting from merge block as well. if (node->num_forward_preds() <= 1) continue; if (block_is_plain_continue(node)) continue; // The idom is the natural header block. auto *idom = node->immediate_dominator; assert(idom->succ.size() >= 2); if (idom->merge == MergeType::None) { idom->merge = MergeType::Selection; idom->selection_merge_block = node; } node->headers.push_back(idom); } for (auto *node : forward_post_visit_order) { if (node->num_forward_preds() <= 1) continue; // Continue blocks can always be branched to, from any scope, so don't rewrite anything here. if (node->succ_back_edge) continue; // The idom is the natural header block. auto *idom = node->immediate_dominator; assert(idom->succ.size() >= 2); // We already rewrote this selection construct in serialize_interleaved_merge_scopes. // Don't try to introduce unnecessary ladders. if (idom->merge == MergeType::Loop && idom->loop_merge_block == node) continue; // If we find a construct which is a typical entry <-> exit scenario, do not attempt to rewrite // any branches. The real merge block might be contained inside this construct, and this block merely // serves as the exit merge point. It should generally turn into a loop merge later. if (header_and_merge_block_have_entry_exit_relationship(idom, node)) continue; // Now we want to deal with cases where we are using this selection merge block as "goto" target for inner selection constructs. // Using a loop header might be possible, // but we will need to split up blocks to make sure that we don't end up with headers where the only branches // are either merges or breaks. // This case is relevant when we have something like: // A -> B -> C -> D -> M // A -> M // B -> M // C -> M // D -> M // We'll need intermediate blocks which merge each layer of the selection "onion". rewrite_selection_breaks(idom, node); } } bool CFGStructurizer::query_reachability(const CFGNode &from, const CFGNode &to) const { if (&from == &to) return true; const uint32_t *src_reachability = &reachability_bitset[from.forward_post_visit_order * reachability_stride]; return (src_reachability[to.forward_post_visit_order / 32] & (1u << (to.forward_post_visit_order & 31u))) != 0; } void CFGStructurizer::visit_reachability(const CFGNode &node) { uint32_t *dst_reachability = &reachability_bitset[node.forward_post_visit_order * reachability_stride]; for (auto *succ : node.succ) { // Inherit reachability from all successors. const uint32_t *src_reachability = &reachability_bitset[succ->forward_post_visit_order * reachability_stride]; for (unsigned i = 0; i < reachability_stride; i++) dst_reachability[i] |= src_reachability[i]; } // We can reach ourselves. dst_reachability[node.forward_post_visit_order / 32] |= 1u << (node.forward_post_visit_order & 31u); } void CFGStructurizer::build_reachability() { reachability_stride = (forward_post_visit_order.size() + 31) / 32; reachability_bitset.clear(); reachability_bitset.resize(reachability_stride * forward_post_visit_order.size()); for (auto *node : forward_post_visit_order) visit_reachability(*node); } void CFGStructurizer::recompute_cfg() { reset_traversal(); visit(*entry_block); // Need to prune dead preds before computing dominance. prune_dead_preds(); build_immediate_dominators(); build_reachability(); backwards_visit(); build_immediate_post_dominators(); compute_dominance_frontier(); compute_post_dominance_frontier(); } CFGNode *CFGStructurizer::find_natural_switch_merge_block(CFGNode *node, CFGNode *post_dominator) const { // Maintain the original switch block order if possible to avoid awkward churn in reference output. uint64_t order = 1; for (auto &c : node->ir.terminator.cases) { // We'll need to decrement global order up to N times in the worst case. // Use 64-bit here as a safeguard in case the module is using a ridiculous amount of case labels. c.global_order = order * node->ir.terminator.cases.size(); order++; } // First, sort so that any fallthrough parent comes before fallthrough target. std::sort(node->ir.terminator.cases.begin(), node->ir.terminator.cases.end(), [](const Terminator::Case &a, const Terminator::Case &b) { return a.node->forward_post_visit_order > b.node->forward_post_visit_order; }); // Look at all potential fallthrough candidates and reassign global order. for (size_t i = 0, n = node->ir.terminator.cases.size(); i < n; i++) { for (size_t j = i + 1; j < n; j++) { auto &parent = node->ir.terminator.cases[i]; auto &child = node->ir.terminator.cases[j]; // A case label might be the merge block candidate of the switch. // Don't consider case fallthrough if b post-dominates the entire switch statement. // If a case label is a continue block, ignore it, since it will be a pure continue break in this scenario. // This is not considered a fallthrough, just a common break. if (child.node != post_dominator && parent.node != child.node && !(child.node->succ_back_edge || child.node->is_pseudo_back_edge) && query_reachability(*parent.node, *child.node)) { parent.global_order = child.global_order - 1; break; } } } // Sort again, but this time, by global order. std::stable_sort(node->ir.terminator.cases.begin(), node->ir.terminator.cases.end(), [](const Terminator::Case &a, const Terminator::Case &b) { return a.global_order < b.global_order; }); // Detect impossible fallthrough scenarios. We can have A -> B -> C fallthrough, but not // A -> C and B -> C. In this situation, we should see C as the actual switch merge block, // and rewrite the switch to loop + switch. // Detect this by having two entries with identical global order. bool has_impossible_fallthrough = false; uint64_t target_order = 0; for (size_t i = 1, n = node->ir.terminator.cases.size(); i < n; i++) { if (node->ir.terminator.cases[i].global_order == node->ir.terminator.cases[i - 1].global_order) { target_order = node->ir.terminator.cases[i].global_order + 1; has_impossible_fallthrough = true; break; } } CFGNode *candidate = nullptr; if (has_impossible_fallthrough) { for (auto &c : node->ir.terminator.cases) { if (c.global_order == target_order) { // Pick the earliest one. candidate = c.node; break; } } } bool case_labels_can_be_candidate_frontier = false; if (has_impossible_fallthrough && !candidate) { // This can happen if the impossible candidate block is a pred of yet another case label ?!?! // If this happens, do the full analysis in the loop below. case_labels_can_be_candidate_frontier = true; } // We found a candidate, but there might be multiple candidates which are considered impossible. // If two case labels merge execution before the candidate merge, we should consider that the natural merge, // since it is not possible to express this without a switch merge. for (auto &c : node->ir.terminator.cases) { for (auto *front : c.node->dominance_frontier) { // Never consider continue constructs here. if (front->succ_back_edge || front->is_pseudo_back_edge) continue; if (!case_labels_can_be_candidate_frontier) { // Ignore frontiers that are other case labels. // We allow simple fallthrough, and if we found an impossible case we would have handled it already. for (auto &ic : node->ir.terminator.cases) { if (ic.node == front) { front = nullptr; break; } } } if (!front) continue; if (!post_dominator || (front->forward_post_visit_order != post_dominator->forward_post_visit_order && query_reachability(*front, *post_dominator))) { // If this is reachable by a different case label, we have a winner. This must be a fake fallthrough // that we should promote to switch merge. for (auto &ic : node->ir.terminator.cases) { if (ic.node != c.node && query_reachability(*ic.node, *front)) { // Select the innermost block that is impossible. // Breaking further out can be handled with loops, etc. if (!candidate || front->forward_post_visit_order > candidate->forward_post_visit_order) candidate = front; } } } } } return candidate ? candidate : post_dominator; } CFGNode *CFGStructurizer::create_switch_merge_ladder(CFGNode *header, CFGNode *merge) { // We did not rewrite switch blocks w.r.t. selection breaks. // We might be in a situation where the switch block is trying to merge to a block which is already being merged to. // Create a ladder which the switch block could merge to. return create_ladder_block(header, merge, ".switch-merge"); } Operation *CFGStructurizer::build_switch_case_equal_check( const CFGNode *header, CFGNode *insert_node, const Terminator::Case &c) { Operation *ieq; if (c.is_default) { // Awkward since we have to compare all other case labels. Operation *neq_and = nullptr; for (auto &label : header->ir.terminator.cases) { if (!label.is_default) { Operation *neq = module.allocate_op(spv::OpINotEqual, module.allocate_id(), module.get_builder().makeBoolType()); neq->add_id(header->ir.terminator.conditional_id); neq->add_id(module.get_builder().makeUintConstant(label.value)); insert_node->ir.operations.push_back(neq); if (neq_and) { Operation *and_op = module.allocate_op(spv::OpLogicalAnd, module.allocate_id(), module.get_builder().makeBoolType()); and_op->add_id(neq_and->id); and_op->add_id(neq->id); insert_node->ir.operations.push_back(and_op); neq_and = and_op; } else { neq_and = neq; } } } ieq = neq_and; } else { ieq = module.allocate_op(spv::OpIEqual, module.allocate_id(), module.get_builder().makeBoolType()); ieq->add_id(header->ir.terminator.conditional_id); ieq->add_id(module.get_builder().makeUintConstant(c.value)); insert_node->ir.operations.push_back(ieq); } return ieq; } void CFGStructurizer::hoist_switch_branches_to_frontier(CFGNode *node, CFGNode *merge, CFGNode *dominance_frontier_candidate) { // Dispatch to the dominance frontier before we enter switch scope. auto *pred = create_helper_pred_block(node); std::swap(pred->ir.operations, node->ir.operations); auto succs = node->succ; for (auto *succ : succs) { if (!query_reachability(*succ, *dominance_frontier_candidate)) continue; // Rewrite the case label to reach merge block in a unique path. // That way we can PHI select whether to branch to dominance frontier or not // in the switch merge block. spv::Id cond_id = 0; for (auto &c : node->ir.terminator.cases) { if (c.node == succ) { auto *ieq = build_switch_case_equal_check(node, pred, c); if (cond_id) { auto *bor = module.allocate_op(spv::OpLogicalOr, module.allocate_id(), module.get_builder().makeBoolType()); bor->add_id(cond_id); bor->add_id(ieq->id); pred->ir.operations.push_back(bor); cond_id = bor->id; } else { cond_id = ieq->id; } } } if (succ == dominance_frontier_candidate) { // We're directly branching to target, so might have to rewrite PHI incoming // block to pred helper block instead. for (auto &phi : dominance_frontier_candidate->ir.phi) for (auto &incoming : phi.incoming) if (incoming.block == node) incoming.block = pred; } for (auto *&p : succ->pred) if (p == node) p = pred; for (auto &c : node->ir.terminator.cases) if (c.node == succ) c.node = merge; node->succ.erase(std::find(node->succ.begin(), node->succ.end(), succ)); node->add_branch(merge); pred->add_branch(succ); // Make sure that our selection branch has somewhere to merge if it has to. if (succ == dominance_frontier_candidate) { succ = pred->rewrite_branch_through_intermediate_node(dominance_frontier_candidate, dominance_frontier_candidate); } pred->ir.terminator.type = Terminator::Type::Condition; pred->ir.terminator.conditional_id = cond_id; pred->ir.terminator.true_block = succ; pred->ir.terminator.false_block = node; pred->ir.terminator.direct_block = nullptr; // Have to assume that there is only one path to this frontier, // otherwise we're in a world of impossible case merges // which should have been handled elsewhere ... return; } } CFGStructurizer::SwitchProgressMode CFGStructurizer::process_switch_blocks(unsigned pass) { bool modified_cfg = false; for (auto index = forward_post_visit_order.size(); index; index--) { auto *node = forward_post_visit_order[index - 1]; if (node->ir.terminator.type != Terminator::Type::Switch) continue; auto *merge = find_common_post_dominator(node->succ); auto *natural_merge = find_natural_switch_merge_block(node, merge); // If there are early exits inside the switch statement, post-dominance analysis won't work. // Just pick the natural merge. // This only seems to happen in dxbc2dxil. if (!merge) merge = natural_merge; // If there is still nothing, it's possible one of the case labels is the only non-exiting path. // If we have no natural merge either, this is the likely merge point. if (!merge) { CFGNode *pdom = nullptr; for (auto *succ : node->succ) { if (!succ->dominates_all_reachable_exits()) { if (!pdom) { pdom = succ; } else { auto *new_pdom = CFGNode::find_common_post_dominator(pdom, succ); if (new_pdom) pdom = new_pdom; } // If there is at least one exit, have a fallback. merge = succ; natural_merge = succ; } } // If we have a valid pdom, that is the more reasonable target. if (pdom) { merge = pdom; natural_merge = pdom; } } if (!merge) { // Merge to unreachable. node->merge = MergeType::Selection; continue; } if (node->freeze_structured_analysis && node->merge == MergeType::Selection) { natural_merge = node->selection_merge_block; } else if (pass == 0) { // It is possible that we don't necessarily want to merge to the post-dominator. // There might be inner constructs which are better suited. // This can happen if some branches break farther out than some other branches. // We should let the loop ladder system take care of that. // The switch merge should consume the smallest possible scope. if (merge != natural_merge) { CFGNode *inner_merge = merge; for (auto *frontier_node : natural_merge->dominance_frontier) { if (node->dominates(frontier_node) && merge->post_dominates(frontier_node) && frontier_node->forward_post_visit_order > inner_merge->forward_post_visit_order) { inner_merge = frontier_node; } } if (merge != inner_merge && inner_merge != natural_merge && node->dominates(merge)) { // If node dominates the merge, it's important that node remains a header block. // If we have an inner merge, we need to transpose the control flow so that // we avoid the inner merge altogether. Vector constructs = { natural_merge }; for (auto *pred : inner_merge->pred) if (!query_reachability(*pred, *natural_merge) && !query_reachability(*natural_merge, *pred)) constructs.push_back(pred); if (constructs.size() >= 2) { collect_and_dispatch_control_flow(node, merge, constructs, false, false); return SwitchProgressMode::IterativeModify; } } merge = inner_merge; // Relying on loop ladder system might not be possible in all situations. // It's possible that the switch block is also a loop header for example. // Need to transpose the code with a ladder to avoid impossible problems later. if (node->pred_back_edge) natural_merge = transpose_code_path_through_ladder_block(node, natural_merge, inner_merge); } else if (merge && !node->dominates(merge)) { CFGNode *dominance_frontier_candidate = nullptr; // If we have a normal merge scenario (merge == natural_merge), // there might still be breaks which can reach the switch merge block. // This can happen if a switch block is in an if() {} block, and // one of the case labels branch to the else() block. Both the switch and else() block // reconvene later, which means that we should hoist the break so it's not contained // in switch scope. for (auto *frontier : node->dominance_frontier) { if (frontier->forward_post_visit_order != merge->forward_post_visit_order && query_reachability(*frontier, *merge)) { // Uncertain if we can deal with this. // Multiple nested branches perhaps? if (dominance_frontier_candidate) LOGW("Multiple candidates for switch break transposition.\n"); dominance_frontier_candidate = frontier; } } if (dominance_frontier_candidate) hoist_switch_branches_to_frontier(node, merge, dominance_frontier_candidate); } bool can_merge_to_post_dominator = merge && node->dominates(merge) && merge->headers.empty(); // Need to guarantee that we can merge somewhere. // If possible we want to make it so that by creating a ladder, // we change the post-dominator to something we dominate. // For this to work, the dominance frontier of node must only contain the merge node. if (merge != natural_merge && !can_merge_to_post_dominator && node->dominance_frontier.size() == 1 && node->dominance_frontier.front() == merge) { merge = create_switch_merge_ladder(node, merge); assert(node->dominates(merge)); modified_cfg = true; can_merge_to_post_dominator = true; } // Need to rewrite the switch if we're not already a loop header. if (merge != natural_merge && can_merge_to_post_dominator && !node->pred_back_edge) { auto *switch_outer = create_helper_pred_block(node); switch_outer->merge = MergeType::Loop; switch_outer->loop_merge_block = merge; switch_outer->freeze_structured_analysis = true; merge->headers.push_back(switch_outer); // Shouldn't be needed (I believe), but spirv-val is a bit temperamental when double breaking // straight out of a switch block in some situations, // so try not to ruffle too many feathers. if (std::find(node->succ.begin(), node->succ.end(), natural_merge) != node->succ.end()) { auto *dummy_case = pool.create_node(); dummy_case->name = natural_merge->name + ".pred"; dummy_case->immediate_dominator = node; dummy_case->immediate_post_dominator = natural_merge; dummy_case->forward_post_visit_order = node->forward_post_visit_order; dummy_case->backward_post_visit_order = node->backward_post_visit_order; dummy_case->ir.terminator.type = Terminator::Type::Branch; dummy_case->ir.terminator.direct_block = natural_merge; dummy_case->add_branch(natural_merge); node->retarget_branch(natural_merge, dummy_case); } node->freeze_structured_analysis = true; } // Switch case labels must be contained within the switch statement. // Use a dummy label if we have to. auto succs = node->succ; for (auto *succ : succs) { bool need_fixup = false; if (succ == merge) { if (merge != natural_merge) { // If we used outer shell method, we dominate merge, // but not structurally, since there's a loop merge already. need_fixup = can_merge_to_post_dominator; } else { // If this happens we are our own outer shell. // The node itself is both a loop header *and* switch header, // so similar analysis applies. // Only consider fixup if we cannot reach continue block. // This can still be a normal inner merge for the switch, which then branches to continue block. need_fixup = node->pred_back_edge != nullptr && !query_reachability(*succ, *node->pred_back_edge); } } else { // If we don't dominate succ, but it's not the common merge block, this is // an edge case we have to handle as well. // We might dominate a continue block, but these actually belong to outer loop scope. need_fixup = !node->dominates(succ) || succ->succ_back_edge; } // Guard against duplicate label branches. bool has_succ = std::find(node->succ.begin(), node->succ.end(), succ) != node->succ.end(); if (need_fixup && has_succ) { auto *dummy_break = pool.create_node(); dummy_break->name = node->name + (succ->succ_back_edge ? ".continue" : ".break"); dummy_break->immediate_dominator = node; dummy_break->immediate_post_dominator = succ; dummy_break->forward_post_visit_order = node->forward_post_visit_order; dummy_break->backward_post_visit_order = node->backward_post_visit_order; dummy_break->ir.terminator.type = Terminator::Type::Branch; dummy_break->ir.terminator.direct_block = succ; dummy_break->is_pseudo_back_edge = succ->succ_back_edge != nullptr; dummy_break->add_branch(succ); node->retarget_branch(succ, dummy_break); } } } merge = natural_merge; CFGNode *merge_ladder = nullptr; // We cannot rewrite the CFG in pass 1 safely, this should have happened in pass 0. if (pass == 0 && (!node->dominates(merge) || block_is_plain_continue(merge))) { merge_ladder = create_switch_merge_ladder(node, merge); merge = find_common_post_dominator(node->succ); // If there are early-exits, the pdom may be nullptr. Safeguard against this. // This only seems to happen in dxbc2dxil. if (!merge) merge = merge_ladder; modified_cfg = true; } if (node->dominates(merge)) { //LOGI("Switch merge: %p (%s) -> %p (%s)\n", static_cast(node), node->name.c_str(), // static_cast(merge), merge->name.c_str()); node->merge = MergeType::Selection; // There is a small chance that this is supposed to be a loop merge target. // We'll fix that up later if needed. In that case, the switch block will merge to unreachable. node->selection_merge_block = merge; merge->add_unique_header(node); } else { // We got a switch block where someone is escaping. Similar idea as for loop analysis. // Find a post-dominator where we ignore branches which are "escaping". auto *dominated_merge_target = find_common_post_dominator_with_ignored_break(node->succ, merge); if (!dominated_merge_target) { LOGW("No dominated merge target found. Likely a bug. Falling back to merge ladder.\n"); dominated_merge_target = merge_ladder; } assert(dominated_merge_target); if (node->dominates(dominated_merge_target)) { node->merge = MergeType::Selection; node->selection_merge_block = merge; dominated_merge_target->add_unique_header(node); merge->add_unique_header(node); } } // A switch header might also be a loop header. Create a helper succ block for this case. if (pass == 0 && node->pred_back_edge) { node = create_helper_succ_block(node); modified_cfg = true; } } return modified_cfg ? SwitchProgressMode::SimpleModify : SwitchProgressMode::Done; } bool CFGStructurizer::merge_candidate_is_inside_continue_construct(const CFGNode *node) const { // If we've reached the continue construct, we cannot merge away from that construct. // Any such merge must be eliminated. We can know this for certain if the succ of node // post dominates the entire loop construct, since that node is the obvious merge node. assert(node->succ.size() == 1); for (auto *pred : node->pred) { if (pred->succ_back_edge && node->succ.front()->post_dominates(pred->succ_back_edge) && pred->succ_back_edge->dominates(node->succ.front()) && !pred->dominates(node)) { return true; } } return false; } bool CFGStructurizer::merge_candidate_is_on_breaking_path(const CFGNode *node) const { return node->pred.size() >= 2 && node->succ.size() == 1 && !node->dominates(node->succ.front()) && node->succ.front()->post_dominates(node) && control_flow_is_escaping(node, node->succ.front()) && !node->post_dominates_perfect_structured_construct(); } void CFGStructurizer::find_selection_merges(unsigned pass) { for (auto *node : forward_post_visit_order) { if (node->num_forward_preds() <= 1) continue; // Never merge to continue block. // We should never hit this path unless we explicitly // avoided creating a continue ladder block earlier. if (block_is_plain_continue(node)) continue; // If there are 2 or more pred edges, try to merge execution. // The idom is the natural header block. auto *idom = node->immediate_dominator; assert(idom->succ.size() >= 2); // Check for case fallthrough here. In this case, we do not have a merge scenario, just ignore. auto *inner_header = node->get_outer_selection_dominator(); if (inner_header && inner_header->ir.terminator.type == Terminator::Type::Switch) { if (inner_header->selection_merge_block == node) { // We just found a switch block which we have already handled. continue; } if (std::find(inner_header->succ.begin(), inner_header->succ.end(), node) != inner_header->succ.end()) { // Fallthrough. continue; } } for (auto *header : node->headers) { // If we have a loop header already associated with this block, treat that as our idom. if (header->forward_post_visit_order > idom->forward_post_visit_order) idom = header; } // Similar, but also check if we have associated ladder blocks with the idom. if (!idom->pred_back_edge) { auto *inner_loop_header = get_innermost_loop_header_for(idom); if (inner_loop_header && inner_loop_header->loop_ladder_block == node) idom = const_cast(inner_loop_header); } if (idom->merge == MergeType::None || idom->merge == MergeType::Selection) { // We just found a switch block which we have already handled. if (idom->ir.terminator.type == Terminator::Type::Switch) continue; // If the idom is already a selection construct, this must mean // we have some form of breaking construct inside this inner construct. // This fooled find_selection_merges() to think we had a selection merge target at the break target. // Fix this up here, where we rewrite the outer construct as a fixed loop instead. if (idom->merge == MergeType::Selection) { if (pass == 0) { assert(idom->selection_merge_block); // If we turn the outer selection construct into a loop, // we remove the possibility to break further out (without adding ladders like we do for loops). // To make this work, we must ensure that the new merge block post-dominates the loop and selection merge. auto *merge_candidate = CFGNode::find_common_post_dominator(idom->selection_merge_block, idom); if (!merge_candidate || merge_candidate == idom->selection_merge_block) { idom->loop_merge_block = idom->selection_merge_block; } else { // Make sure we split merge scopes. Pretend we have a true loop. idom->loop_ladder_block = idom->selection_merge_block; idom->loop_merge_block = merge_candidate; } idom->loop_merge_block->add_unique_header(idom); idom->merge = MergeType::Loop; idom->selection_merge_block = nullptr; idom->freeze_structured_analysis = true; idom = create_helper_succ_block(idom); } else LOGW("Mismatch headers in pass 1 ... ?\n"); } // If we're in a pass 1, opting for a selection merge better make sure that we can // actually use this as a merge block. // If we have more than 2 preds, there is no way this is not a break block merge. // It is not a switch statement and selections spawn two new scopes. // We should have resolved this in pass 0, but it can slip through the cracks if there // are multiple interleaving merge scopes in play. bool force_loop = pass == 1 && node->num_forward_preds() > 2 && idom->merge == MergeType::None; if (force_loop) { idom->merge = MergeType::Loop; node->add_unique_header(idom); idom->loop_merge_block = node; idom->freeze_structured_analysis = true; } else { idom->merge = MergeType::Selection; node->add_unique_header(idom); assert(node); idom->selection_merge_block = node; //LOGI("Selection merge: %p (%s) -> %p (%s)\n", static_cast(idom), idom->name.c_str(), // static_cast(node), node->name.c_str()); } } else if (idom->merge == MergeType::Loop) { if (pass == 0) { if (idom->loop_merge_block == node && idom->loop_ladder_block) { // We need to create an outer shell for this header since we need to ladder break to this node. auto *loop = create_helper_pred_block(idom); loop->merge = MergeType::Loop; loop->loop_merge_block = node; loop->freeze_structured_analysis = true; node->add_unique_header(loop); //LOGI("Loop merge: %p (%s) -> %p (%s)\n", static_cast(loop), loop->name.c_str(), // static_cast(node), node->name.c_str()); } else if (idom->loop_merge_block != node && idom->loop_ladder_block != node) { auto *selection_idom = create_helper_succ_block(idom); // If we split the loop header into the loop header -> selection merge header, // then we can merge into a continue block for example. selection_idom->merge = MergeType::Selection; selection_idom->selection_merge_block = node; node->add_unique_header(selection_idom); //LOGI("Selection merge: %p (%s) -> %p (%s)\n", static_cast(selection_idom), // selection_idom->name.c_str(), static_cast(node), node->name.c_str()); } } } else { // We are hosed. There is no obvious way to merge execution here. // This might be okay. LOGW("Cannot merge execution for node %p (%s).\n", static_cast(node), node->name.c_str()); } } } const CFGNode *CFGStructurizer::get_innermost_loop_header_for(const CFGNode *header, const CFGNode *other) const { auto *node = other; while (header != other) { // Entry block case. if (other->pred.empty()) break; // Found a loop header. This better be the one. // Detect false positive if back-edge can reach the node, this means we just skip over // the loop. We want to detect loops in a structured sense. // Breaking constructs should still detect the loop header as we'd expect. if (other->pred_back_edge && (other->pred_back_edge == node || !query_reachability(*other->pred_back_edge, *node))) break; assert(other->immediate_dominator); other = other->immediate_dominator; } return other; } const CFGNode *CFGStructurizer::get_innermost_loop_header_for(const CFGNode *other) const { return get_innermost_loop_header_for(entry_block, other); } bool CFGStructurizer::loop_exit_supports_infinite_loop(const CFGNode *header, const CFGNode *loop_exit) const { auto *inner_header = get_innermost_loop_header_for(header, loop_exit); // A loop exit can exit out to an outer scope such that inner_header dominates the header. // If there is no inner loop we can transform the loop exit into a merge block quite easily // and avoid the infinite loop. if (inner_header->dominates(header)) return false; // We have a candidate. If the candidates dominates all reachable exits, there is never a need to merge later. return loop_exit->dominates_all_reachable_exits(); } CFGStructurizer::LoopExitType CFGStructurizer::get_loop_exit_type(const CFGNode &header, const CFGNode &node) const { // If there exists an inner loop which dominates this exit, we treat it as an inner loop exit. const CFGNode *innermost_loop_header = get_innermost_loop_header_for(&header, &node); bool is_innermost_loop_header = &header == innermost_loop_header; // If a back-edge can reach this node, it's not really an exit, but an Escape. // Exits must never branch "out" of the loop. if (header.dominates(&node) && (!header.pred_back_edge || !query_reachability(*header.pred_back_edge, node)) && node.dominates_all_reachable_exits()) { if (is_innermost_loop_header) return LoopExitType::Exit; else return LoopExitType::InnerLoopExit; } if (header.dominates(&node)) { if (is_innermost_loop_header) { // Even if we dominate node, we might not be able to merge to it. if (!header.can_loop_merge_to(&node)) { // This is an escape we dominate, but this could also be a case where we break // to a continue construct in the outer loop which is not reachable through back traversal. // This will confuse loop analysis, since this kind of double continue will not resolve properly. // In this case we need to rendezvous at this block with a ladder to avoid // double-continue. auto *outer_infinite_loop = get_innermost_loop_header_for(entry_block, innermost_loop_header->immediate_dominator); if (outer_infinite_loop && outer_infinite_loop->pred_back_edge && outer_infinite_loop->pred_back_edge->succ.empty() && outer_infinite_loop->pred_back_edge->post_dominates(&node)) { return LoopExitType::MergeToInfiniteLoop; } else return LoopExitType::Escape; } return LoopExitType::Merge; } else { // Try to detect if this is a degenerate inner loop merge. // If the inner loop header is the only way to exit the loop construct, // the loop exit block is a false exit. // This is the case if the candidate must pass through the back edge, and the back edge can only branch to header. // In this case, the loop will not be visible through back-propagation, but it is definitely part of the loop construct. if (!innermost_loop_header->pred_back_edge || innermost_loop_header->pred_back_edge->ir.terminator.type != Terminator::Type::Branch) return LoopExitType::InnerLoopMerge; auto *post = find_common_post_dominator({ const_cast(&node), innermost_loop_header->pred_back_edge }); if (post == innermost_loop_header->pred_back_edge) return LoopExitType::InnerLoopFalsePositive; else return LoopExitType::InnerLoopMerge; } } else return LoopExitType::Escape; } CFGNode *CFGStructurizer::create_helper_pred_block(CFGNode *node) { auto *pred_node = pool.create_node(); pred_node->name = node->name + ".pred"; // Fixup visit order later. pred_node->forward_post_visit_order = node->forward_post_visit_order; pred_node->backward_post_visit_order = node->backward_post_visit_order; std::swap(pred_node->pred, node->pred); for (auto *header : node->headers) header->fixup_merge_info_after_branch_rewrite(node, pred_node); node->headers.clear(); // We're replacing entry block. if (node == node->immediate_dominator) pred_node->immediate_dominator = pred_node; else pred_node->immediate_dominator = node->immediate_dominator; pred_node->immediate_post_dominator = node; node->immediate_dominator = pred_node; retarget_pred_from(pred_node, node); pred_node->add_branch(node); if (node == entry_block) entry_block = pred_node; pred_node->ir.terminator.type = Terminator::Type::Branch; pred_node->ir.terminator.direct_block = node; return pred_node; } void CFGStructurizer::retarget_pred_from(CFGNode *new_node, CFGNode *old_succ) { for (auto *p : new_node->pred) { for (auto &s : p->succ) if (s == old_succ) s = new_node; auto &p_term = p->ir.terminator; if (p_term.direct_block == old_succ) p_term.direct_block = new_node; if (p_term.true_block == old_succ) p_term.true_block = new_node; if (p_term.false_block == old_succ) p_term.false_block = new_node; for (auto &c : p_term.cases) if (c.node == old_succ) c.node = new_node; } // Do not swap back edges. // Retarget immediate post dominators. for (auto *n : forward_post_visit_order) if (n->immediate_post_dominator == old_succ) n->immediate_post_dominator = new_node; } void CFGStructurizer::retarget_succ_from(CFGNode *new_node, CFGNode *old_pred) { for (auto *s : new_node->succ) for (auto &p : s->pred) if (p == old_pred) p = new_node; for (auto *node : forward_post_visit_order) { if (node != old_pred) { // Don't override immediate dominator for entry block. if (node->immediate_dominator == old_pred) node->immediate_dominator = new_node; } } new_node->immediate_dominator = old_pred; // Do not swap back edges. } CFGNode *CFGStructurizer::create_helper_succ_block(CFGNode *node) { auto *succ_node = pool.create_node(); succ_node->name = node->name + ".succ"; // Fixup visit order later. succ_node->forward_post_visit_order = node->forward_post_visit_order; succ_node->backward_post_visit_order = node->backward_post_visit_order; std::swap(succ_node->succ, node->succ); // Do not swap back edges, only forward edges. succ_node->immediate_post_dominator = node->immediate_post_dominator; node->immediate_post_dominator = succ_node; succ_node->ir.terminator = node->ir.terminator; node->ir.terminator.type = Terminator::Type::Branch; node->ir.terminator.direct_block = succ_node; // Inherit selection construct from parent since we're taking over any selection. if (succ_node->ir.terminator.type == Terminator::Type::Condition) succ_node->ir.merge_info.selection_control_mask = node->ir.merge_info.selection_control_mask; retarget_succ_from(succ_node, node); node->add_branch(succ_node); return succ_node; } CFGNode *CFGStructurizer::find_common_post_dominator(const Vector &candidates) { if (candidates.empty()) return nullptr; else if (candidates.size() == 1) return candidates.front(); CFGNode *common_post = CFGNode::find_common_post_dominator(candidates[0], candidates[1]); for (size_t i = 2; i < candidates.size(); i++) common_post = CFGNode::find_common_post_dominator(common_post, candidates[i]); return common_post != common_post->immediate_post_dominator ? common_post : nullptr; } CFGNode *CFGStructurizer::find_break_target_for_selection_construct(CFGNode *idom, CFGNode *merge) { Vector new_visit_queue; UnorderedSet visited; Vector visit_queue; Vector candidates; visit_queue.push_back(idom); do { for (auto *n : visit_queue) { if (visited.count(n)) continue; visited.insert(n); if (query_reachability(*merge, *n)) continue; if (query_reachability(*n, *merge)) { for (auto *succ : n->succ) new_visit_queue.push_back(succ); } else { // Cannot merge into a loop construct. // Merging towards an outer loop construct would probably lead to weird results, // but allow it here. auto *inner = get_innermost_loop_header_for(n); if (inner != entry_block && query_reachability(*idom, *inner)) continue; // The breaking path might be vestigal. // I.e., it might just be exiting directly without dominating anything. // Have to detect this false positive, since it's not really a break, just early return. // If we hit a dominance frontier, allow it as a candidate since it cannot be early return within // the construct. if (!n->dominates_all_reachable_exits() || !idom->dominates(n)) candidates.push_back(n); } } visit_queue = new_visit_queue; new_visit_queue.clear(); } while (!visit_queue.empty()); if (candidates.empty()) return nullptr; else return find_common_post_dominator(candidates); } CFGNode *CFGStructurizer::find_common_post_dominator_with_ignored_break(Vector candidates, const CFGNode *ignored_node) { if (candidates.empty()) return nullptr; Vector next_nodes; const auto add_unique_next_node = [&](CFGNode *node) { if (node != ignored_node) if (std::find(next_nodes.begin(), next_nodes.end(), node) == next_nodes.end()) next_nodes.push_back(node); }; while (candidates.size() != 1) { // Sort candidates by post visit order. std::sort(candidates.begin(), candidates.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; }); // We reached exit without merging execution, there is no common post dominator. // A continue block which only branches back to header is conveniently ignored here. if (candidates.front()->succ.empty() && !candidates.front()->succ_back_edge) return nullptr; for (auto *succ : candidates.front()->succ) add_unique_next_node(succ); for (auto itr = candidates.begin() + 1; itr != candidates.end(); ++itr) add_unique_next_node(*itr); candidates.clear(); std::swap(candidates, next_nodes); } if (candidates.empty()) return nullptr; return candidates.front(); } void CFGStructurizer::rewrite_ladder_conditional_branch_from_incoming_blocks( CFGNode *ladder, CFGNode *true_block, CFGNode *false_block, const std::function &path_cb, const String &name) { ladder->add_branch(true_block); ladder->add_branch(false_block); ladder->ir.terminator.type = Terminator::Type::Condition; ladder->ir.terminator.conditional_id = module.allocate_id(); ladder->ir.terminator.true_block = true_block; ladder->ir.terminator.false_block = false_block; ladder->ir.terminator.direct_block = nullptr; PHI phi; phi.id = ladder->ir.terminator.conditional_id; phi.type_id = module.get_builder().makeBoolType(); module.get_builder().addName(phi.id, name.c_str()); for (auto *pred : ladder->pred) { IncomingValue incoming = {}; incoming.block = pred; incoming.id = module.get_builder().makeBoolConstant(path_cb(pred)); phi.incoming.push_back(incoming); } ladder->ir.phi.push_back(std::move(phi)); } CFGNode *CFGStructurizer::transpose_code_path_through_ladder_block( CFGNode *header, CFGNode *merge, CFGNode *path) { assert(header->dominates(merge) && header->dominates(path)); assert(query_reachability(*merge, *path)); assert(!merge->dominates(path)); assert(header != merge); assert(merge != path); assert(header != path); // Rewrite the merge block into merge.pred where merge.pred will branch to either merge or path. auto *ladder = create_ladder_block(header, merge, ".transpose"); UnorderedSet normal_preds; for (auto *p : ladder->pred) normal_preds.insert(p); traverse_dominated_blocks_and_rewrite_branch(header, path, ladder); rewrite_ladder_conditional_branch_from_incoming_blocks( ladder, path, merge, [&](const CFGNode *n) { return normal_preds.count(n) == 0; }, String("transpose_ladder_phi_") + ladder->name); return ladder; } void CFGStructurizer::rewrite_transposed_loop_outer(CFGNode *node, CFGNode *impossible_merge_target, const LoopMergeAnalysis &analysis) { auto impossible_preds = impossible_merge_target->pred; auto *replaced_merge_block = create_helper_pred_block(analysis.dominated_merge); replaced_merge_block->name = analysis.dominated_merge->name + ".transposed-merge-outer"; for (auto *pred : impossible_preds) if (!query_reachability(*analysis.dominated_merge, *pred)) pred->retarget_branch(impossible_merge_target, replaced_merge_block); rewrite_ladder_conditional_branch_from_incoming_blocks( replaced_merge_block, impossible_merge_target, analysis.dominated_merge, [&](const CFGNode *n) { return std::find(impossible_preds.begin(), impossible_preds.end(), n) != impossible_preds.end(); }, String("transposed_selector_") + node->name); } void CFGStructurizer::rewrite_transposed_loop_inner(CFGNode *node, CFGNode *impossible_merge_target, const LoopMergeAnalysis &analysis) { // Rewrite the control flow from the inside out through a transposition. // The common break target will become the merge block instead. // The continue will break out to the transposed merge instead. // In the ladder, we will enter a breaking path which branches out to loop_ladder. // We just arbitrary call this "inner", since I don't think it has a formal name. // In this case, dominated merge cannot reach impossible merge target. auto *merge = analysis.merge; auto *dominated_merge = analysis.dominated_merge; auto *ladder_break = pool.create_node(); ladder_break->name = node->name + ".transposed-merge-inner.break"; ladder_break->ir.terminator.type = Terminator::Type::Branch; ladder_break->ir.terminator.direct_block = impossible_merge_target; ladder_break->immediate_post_dominator = impossible_merge_target; ladder_break->forward_post_visit_order = impossible_merge_target->forward_post_visit_order; ladder_break->backward_post_visit_order = impossible_merge_target->backward_post_visit_order; auto *ladder_selection = pool.create_node(); ladder_selection->name = node->name + ".transposed-merge-inner"; ladder_selection->forward_post_visit_order = impossible_merge_target->forward_post_visit_order; ladder_selection->backward_post_visit_order = impossible_merge_target->backward_post_visit_order; ladder_selection->immediate_post_dominator = merge; ladder_break->immediate_dominator = ladder_selection; auto ladder_preds = dominated_merge->pred; ladder_selection->add_branch(ladder_break); ladder_selection->add_branch(dominated_merge); traverse_dominated_blocks_and_rewrite_branch(node, impossible_merge_target, ladder_selection); ladder_selection->recompute_immediate_dominator(); ladder_break->add_branch(impossible_merge_target); // Branches from these blocks should be rewritten to target transposed-merge. for (auto *ladder_pred : ladder_preds) ladder_pred->retarget_branch(dominated_merge, ladder_selection); rewrite_ladder_conditional_branch_from_incoming_blocks( ladder_selection, dominated_merge, ladder_break, [&](const CFGNode *n) { return std::find(ladder_preds.begin(), ladder_preds.end(), n) != ladder_preds.end(); }, String("transposed_selector_") + node->name); } bool CFGStructurizer::rewrite_transposed_loops() { bool did_rewrite = false; for (auto index = forward_post_visit_order.size(); index && !did_rewrite; index--) { // Visit in reverse order so we resolve outer loops first, // this lets us detect ladder-breaking loops. auto *node = forward_post_visit_order[index - 1]; if (node->freeze_structured_analysis && node->merge == MergeType::Loop) continue; if (!node->has_pred_back_edges()) continue; auto result = analyze_loop(node); auto merge_result = analyze_loop_merge(node, result); auto *merge = merge_result.merge; auto *dominated_merge = merge_result.dominated_merge; if (!merge || !dominated_merge) continue; // We might have a horribly complex scenario where a loop breaks, but it breaks to an outer scope // which is not consistent with the merge block, i.e. we need structured control flow to resolve properly // before we can break. This is ... problematic. // We call this an "inner" transposed loop here since merge block cannot reach this block. // Always resolve infinite continue ladders. This is where we break to // an outer infinite loop. We must resolve the scopes by making this ladder the // merge point, then we can break further. CFGNode *impossible_merge_target = merge_result.infinite_continue_ladder; if (!impossible_merge_target && !result.non_dominated_exit.empty()) { auto *common_break_target = find_common_post_dominator(result.non_dominated_exit); if (common_break_target && common_break_target != merge && !query_reachability(*dominated_merge, *common_break_target) && !query_reachability(*common_break_target, *dominated_merge)) { // Another weird scenario is where we dominate the outer continue, // which would escape the DF analysis, but that is strong evidence we need to transpose. // A normal break would never dominate anything like that. if (common_break_target->reaches_domination_frontier_before_merge(merge) || common_break_target->dominates_outer_continue(node)) { impossible_merge_target = common_break_target; } } } if (!impossible_merge_target) { // We might have a different scenario where there are multiple breaks, but they break out to different // scopes. One of these might require a similar impossible merge. // Common post dominator analysis would not catch this. // What we're looking for is a node which: // - Is dominated by loop header (or is in the domination frontier of loop header) // - Is reachable, but not dominated by dominated_merge. // - Post dominates one of the non_dominated_exits. // This means the node is in a twilight zone where the node is kinda in the loop construct, but kinda not. // Structured rules for a loop state that a node is in the construct if: // - It is dominated by loop header // - Not dominated by merge block. // In a sense, the merge block ends up branching back into its own loop, which is irreducible, kinda ... // We call this an "outer" transposed loop here since merge block *can* reach this block. for (size_t i = 0, n = result.non_dominated_exit.size(); i < n && !impossible_merge_target; i++) { auto *candidate = result.non_dominated_exit[i]; while (candidate != merge && candidate != dominated_merge) { if (query_reachability(*dominated_merge, *candidate) && !dominated_merge->dominates(candidate)) { // Merge block attempts to branch back into its own loop construct (yikes). impossible_merge_target = candidate; // If we don't dominate the merge target, i.e. we're in the domination frontier, // we have to synthesize a fake impossible merge target first since the rewrite // algorithm depends on node dominating the merge target. if (!node->dominates(impossible_merge_target)) impossible_merge_target = create_ladder_block(node, impossible_merge_target, ".impossible-ladder"); break; } else if (node->dominates(candidate) && candidate != candidate->immediate_post_dominator) { candidate = candidate->immediate_post_dominator; } else { // We will be able to select a candidate in the domination frontier once. // If we failed to find a candidate in the domination frontier, we're done checking. break; } } } } if (impossible_merge_target) { if (query_reachability(*dominated_merge, *impossible_merge_target)) rewrite_transposed_loop_outer(node, impossible_merge_target, merge_result); else rewrite_transposed_loop_inner(node, impossible_merge_target, merge_result); // We have obliterated the existing control flow through transposition, // and any domination or post-domination analysis will break. // Re-traverse the CFG and try again. // Continue until we have eliminated all impossible loops (should be extremely rare). did_rewrite = true; } else if (!result.non_dominated_exit.empty() && dominated_merge->dominance_frontier.size() >= 2) { // If we cannot find the impossible merge target through post-domination analysis, // we might find it through domination frontier analysis. // If all loop exits and the loop header share a domination frontier, // it's probably our candidate. // Only apply this analysis to cases where a loop has at least two dominance frontiers, // which also don't have a dominance relationship with each other. // This is evidence that the loop is attempting to break to multiple different scopes. auto frontier = dominated_merge->dominance_frontier; std::stable_sort(frontier.begin(), frontier.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; }); bool frontier_has_dominance_relationship = false; for (size_t i = 0, n = frontier.size(); i < n && !frontier_has_dominance_relationship; i++) for (size_t j = i + 1; j < n && !frontier_has_dominance_relationship; j++) if (frontier[i]->dominates(frontier[j])) frontier_has_dominance_relationship = true; if (frontier_has_dominance_relationship) continue; for (auto *candidate : dominated_merge->dominance_frontier) { bool all_frontier = std::find(node->dominance_frontier.begin(), node->dominance_frontier.end(), candidate) != node->dominance_frontier.end(); if (all_frontier) { for (auto *non_dominated : result.non_dominated_exit) { if (!node->dominates(non_dominated)) { all_frontier = false; break; } if (std::find(non_dominated->dominance_frontier.begin(), non_dominated->dominance_frontier.end(), candidate) == non_dominated->dominance_frontier.end()) { all_frontier = false; break; } } } if (all_frontier) { if (!impossible_merge_target || candidate->forward_post_visit_order > impossible_merge_target->forward_post_visit_order) { impossible_merge_target = candidate; } } else { impossible_merge_target = nullptr; break; } } if (impossible_merge_target) { auto constructs = result.non_dominated_exit; constructs.push_back(dominated_merge); collect_and_dispatch_control_flow(node, dominated_merge, constructs, false, false); did_rewrite = true; } } } if (did_rewrite) recompute_cfg(); return did_rewrite; } CFGStructurizer::LoopAnalysis CFGStructurizer::analyze_loop(CFGNode *node) const { LoopAnalysis result; // Now, we need to figure out which blocks belong in the loop construct. // The way to figure out a natural loop is any block which is dominated by loop header // and control flow passes to one of the back edges. // Unfortunately, it can be ambiguous which block is the merge block for a loop. // Ideally, there is a unique block which is the loop exit block, but if there are multiple breaks // there are multiple blocks which are not part of the loop construct. LoopBacktracer tracer; auto *pred = node->pred_back_edge; // Back-trace from here. // The CFG is reducible, so node must dominate pred. // Since node dominates pred, there is no pred chain we can follow without // eventually hitting node, and we'll stop traversal there. // All nodes which are touched during this traversal must be part of the loop construct. tracer.trace_to_parent(node, pred); LoopMergeTracer merge_tracer(tracer); merge_tracer.trace_from_parent(node); for (auto *loop_exit : merge_tracer.loop_exits) { auto exit_type = get_loop_exit_type(*node, *loop_exit); switch (exit_type) { case LoopExitType::Exit: result.direct_exits.push_back(loop_exit); break; case LoopExitType::InnerLoopExit: // It's not an exit for us, but the inner loop. result.inner_direct_exits.push_back(loop_exit); break; case LoopExitType::Merge: result.dominated_exit.push_back(loop_exit); break; case LoopExitType::InnerLoopMerge: result.inner_dominated_exit.push_back(loop_exit); break; case LoopExitType::InnerLoopFalsePositive: // In this case, the inner loop can only exit at the loop header, // and thus post-dominance analysis will always fail. // Ignore this case as it's a false exit. break; case LoopExitType::Escape: result.non_dominated_exit.push_back(loop_exit); break; case LoopExitType::MergeToInfiniteLoop: result.dominated_continue_exit.push_back(loop_exit); break; } } // A dominated continue exit should not be considered as such if it can reach other "normal" exits. // In this case, it's just a break. auto continue_itr = result.dominated_continue_exit.begin(); while (continue_itr != result.dominated_continue_exit.end()) { auto *candidate = *continue_itr; bool found_candidate = false; for (auto *dominated : result.dominated_exit) { if (query_reachability(*candidate, *dominated)) { result.non_dominated_exit.push_back(candidate); continue_itr = result.dominated_continue_exit.erase(continue_itr); found_candidate = true; break; } } if (!found_candidate) { for (auto *non_dominated : result.non_dominated_exit) { if (query_reachability(*candidate, *non_dominated)) { result.non_dominated_exit.push_back(candidate); continue_itr = result.dominated_continue_exit.erase(continue_itr); found_candidate = true; break; } } } if (!found_candidate) ++continue_itr; } if (result.dominated_continue_exit.size() > 1) { // If we have multiple continue exit candidates, they better merge into a single clean candidate that we // still dominate, otherwise, ignore this case and treat them all as normal Escape nodes. auto *common = find_common_post_dominator(result.dominated_continue_exit); if (common && node->dominates(common)) { result.dominated_continue_exit.clear(); result.dominated_continue_exit.push_back(common); } else { result.non_dominated_exit.insert(result.non_dominated_exit.end(), result.dominated_continue_exit.begin(), result.dominated_continue_exit.end()); result.dominated_continue_exit.clear(); } } // If the only merge candidates we have are inner dominated, treat them as true dominated exits. if (result.dominated_exit.empty() && !result.inner_dominated_exit.empty()) std::swap(result.dominated_exit, result.inner_dominated_exit); // If there are no direct exists, treat inner direct exists as direct exits. if (result.direct_exits.empty()) std::swap(result.direct_exits, result.inner_direct_exits); // A direct exit can be considered a dominated exit if there are no better candidates. if (result.dominated_exit.empty() && !result.direct_exits.empty()) std::swap(result.dominated_exit, result.direct_exits); // If we only have one direct exit, consider it our merge block. // Pick either Merge or Escape. if (result.direct_exits.size() == 1 && result.dominated_exit.empty() && result.non_dominated_exit.empty()) { if (node->dominates(result.direct_exits.front())) std::swap(result.dominated_exit, result.direct_exits); else std::swap(result.non_dominated_exit, result.direct_exits); } if (result.dominated_exit.size() >= 2) { // Try to see if we can reduce the number of merge blocks to just 1. // This is relevant if we have various "clean" break blocks. auto *post_dominator = find_common_post_dominator(result.dominated_exit); if (std::find(result.dominated_exit.begin(), result.dominated_exit.end(), post_dominator) != result.dominated_exit.end()) { result.dominated_exit.clear(); result.dominated_exit.push_back(post_dominator); } } return result; } CFGStructurizer::LoopMergeAnalysis CFGStructurizer::analyze_loop_merge(CFGNode *node, const LoopAnalysis &analysis) { // We have multiple blocks which are merge candidates. We need to figure out where execution reconvenes. Vector merges; merges.reserve(analysis.inner_dominated_exit.size() + analysis.dominated_exit.size() + analysis.non_dominated_exit.size()); merges.insert(merges.end(), analysis.inner_dominated_exit.begin(), analysis.inner_dominated_exit.end()); merges.insert(merges.end(), analysis.dominated_exit.begin(), analysis.dominated_exit.end()); merges.insert(merges.end(), analysis.non_dominated_exit.begin(), analysis.non_dominated_exit.end()); CFGNode *merge = CFGStructurizer::find_common_post_dominator(merges); CFGNode *dominated_merge = nullptr; // Try to find the sensible target first. // If one of our merge blocks is the successor of the continue block, // this is a prime candidate for a ladder block. if (node->pred_back_edge && node->pred_back_edge->succ.size() == 1 && std::find(analysis.dominated_exit.begin(), analysis.dominated_exit.end(), node->pred_back_edge->succ.front()) != analysis.dominated_exit.end()) { dominated_merge = node->pred_back_edge->succ.front(); } else if (merge && !node->dominates(merge) && analysis.dominated_exit.size() > 1) { // Now, we might have Merge blocks which end up escaping out of the loop construct. // We might have to remove candidates which end up being break blocks after all. Vector non_breaking_exits; non_breaking_exits.reserve(analysis.dominated_exit.size()); for (auto *exit : analysis.dominated_exit) if (!control_flow_is_escaping(exit, merge)) non_breaking_exits.push_back(exit); if (!non_breaking_exits.empty()) dominated_merge = CFGStructurizer::find_common_post_dominator(non_breaking_exits); if (!dominated_merge) { // If we get here, we likely have some questionable tie-break situation. // One possible case is an infinite loop where one path does a multi-level break, // and other paths branch to outer loop's continue. We'll want to only look at dominated exits // with the smallest break scope and try to find a common post dominator. auto *innermost_header = get_innermost_loop_header_for(node->immediate_dominator); Vector continue_exits; if (innermost_header && innermost_header->pred_back_edge) for (auto *exit : analysis.dominated_exit) if (query_reachability(*exit, *innermost_header->pred_back_edge)) continue_exits.push_back(exit); if (!continue_exits.empty()) dominated_merge = CFGStructurizer::find_common_post_dominator(continue_exits); } } else { dominated_merge = CFGStructurizer::find_common_post_dominator(analysis.dominated_exit); } if (!dominated_merge) { LOGW("There is no candidate for ladder merging.\n"); } if (dominated_merge && !node->dominates(dominated_merge)) { LOGW("We don't dominate the merge target ...\n"); dominated_merge = nullptr; } LoopMergeAnalysis merge_result = {}; merge_result.merge = merge; merge_result.weak_merge = merge; merge_result.dominated_merge = dominated_merge; if (!merge) { // Try to find a candidate merge point which ignores any early exits through common post domination frontier // analysis. Vector frontiers; for (auto *m : merges) frontiers.insert(frontiers.end(), m->dominance_frontier.begin(), m->dominance_frontier.end()); // Find the innermost frontier that satisfies the requirements. std::stable_sort(frontiers.begin(), frontiers.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; }); frontiers.erase(std::unique(frontiers.begin(), frontiers.end()), frontiers.end()); for (auto *front : frontiers) { // All merge nodes must reach the candidate for it to be considered a proper merge. auto itr = std::find_if(merges.begin(), merges.end(), [&](const CFGNode *c) { return !query_reachability(*c, *front); }); if (itr == merges.end()) { merge_result.weak_merge = front; break; } } } if (!analysis.dominated_continue_exit.empty()) { assert(analysis.dominated_continue_exit.size() == 1); merge_result.infinite_continue_ladder = analysis.dominated_continue_exit.front(); } return merge_result; } void CFGStructurizer::collect_and_dispatch_control_flow_from_anchor( CFGNode *anchor, const Vector &constructs) { auto &builder = module.get_builder(); // If we have an anchor, it should collect all control flow, maybe dispatch itself, then dispatch to the constructs. // It must be a conditional branch, since it's too much of a mess to deal with switch. assert(anchor->ir.terminator.type == Terminator::Type::Condition); assert(constructs.size() == 2); assert(constructs[0]->post_dominates(anchor->ir.terminator.true_block) || constructs[0]->post_dominates(anchor->ir.terminator.false_block)); assert(constructs[1]->post_dominates(anchor->ir.terminator.true_block) || constructs[1]->post_dominates(anchor->ir.terminator.false_block)); auto *anchor_pred = create_helper_pred_block(anchor); auto *anchor_to_construct0 = pool.create_node(); auto *anchor_to_construct1 = pool.create_node(); auto *anchor_terminator = pool.create_node(); auto *anchor_dispatcher = pool.create_node(); anchor_to_construct0->name = anchor->name + ".anchor0"; anchor_to_construct1->name = anchor->name + ".anchor1"; anchor_to_construct0->immediate_dominator = anchor; anchor_to_construct1->immediate_dominator = anchor; anchor_to_construct0->immediate_post_dominator = constructs[0]; anchor_to_construct1->immediate_post_dominator = constructs[1]; anchor_to_construct0->forward_post_visit_order = constructs[0]->forward_post_visit_order; anchor_to_construct1->forward_post_visit_order = constructs[1]->forward_post_visit_order; anchor_to_construct0->backward_post_visit_order = constructs[0]->backward_post_visit_order; anchor_to_construct1->backward_post_visit_order = constructs[1]->backward_post_visit_order; anchor_to_construct0->add_branch(anchor_terminator); anchor_to_construct1->add_branch(anchor_terminator); anchor_to_construct0->ir.terminator.type = Terminator::Type::Branch; anchor_to_construct0->ir.terminator.direct_block = anchor_terminator; anchor_to_construct1->ir.terminator.type = Terminator::Type::Branch; anchor_to_construct1->ir.terminator.direct_block = anchor_terminator; anchor_terminator->name = anchor->name + ".anchor-term"; anchor_terminator->add_branch(anchor_dispatcher); anchor_terminator->ir.terminator.type = Terminator::Type::Branch; anchor_terminator->ir.terminator.direct_block = anchor_dispatcher; anchor_dispatcher->name = anchor->name + ".anchor-dispatch"; PHI terminator_selector; terminator_selector.id = module.allocate_id(); terminator_selector.type_id = builder.makeBoolType(); terminator_selector.incoming.push_back({ anchor_to_construct0, builder.makeBoolConstant(true) }); terminator_selector.incoming.push_back({ anchor_to_construct1, builder.makeBoolConstant(false) }); traverse_dominated_blocks_and_rewrite_branch(anchor, constructs[0], anchor_to_construct0); traverse_dominated_blocks_and_rewrite_branch(anchor, constructs[1], anchor_to_construct1); size_t cutoff_normal_path = anchor_pred->pred.size(); traverse_dominated_blocks_and_rewrite_branch(constructs[0]->immediate_dominator, constructs[0], anchor_pred); size_t cutoff_path0 = anchor_pred->pred.size(); traverse_dominated_blocks_and_rewrite_branch(constructs[1]->immediate_dominator, constructs[1], anchor_pred); assert(constructs[0]->pred.empty()); assert(constructs[1]->pred.empty()); // Branch to anchor as normal if we have a pre-existing pred. PHI take_anchor_phi; take_anchor_phi.id = module.allocate_id(); take_anchor_phi.type_id = builder.makeBoolType(); for (size_t i = 0; i < cutoff_normal_path; i++) take_anchor_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(true) }); for (size_t i = cutoff_normal_path; i < anchor_pred->pred.size(); i++) take_anchor_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(false) }); anchor_pred->add_branch(anchor); anchor_pred->add_branch(anchor_dispatcher); anchor_pred->ir.terminator.type = Terminator::Type::Condition; anchor_pred->ir.terminator.true_block = anchor; anchor_pred->ir.terminator.false_block = anchor_dispatcher; anchor_pred->ir.terminator.direct_block = nullptr; anchor_pred->ir.terminator.conditional_id = take_anchor_phi.id; PHI outside_true_phi; outside_true_phi.id = module.allocate_id(); outside_true_phi.type_id = builder.makeBoolType(); for (size_t i = 0; i < cutoff_path0; i++) outside_true_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(true) }); for (size_t i = cutoff_path0; i < anchor_pred->pred.size(); i++) outside_true_phi.incoming.push_back({ anchor_pred->pred[i], builder.makeBoolConstant(false) }); PHI anchor_cond_phi; anchor_cond_phi.id = module.allocate_id(); anchor_cond_phi.type_id = builder.makeBoolType(); // If we took the path through anchor, use that conditional. Otherwise, use the selector between path 0 or 1. anchor_cond_phi.incoming.push_back({ anchor, terminator_selector.id }); anchor_cond_phi.incoming.push_back({ anchor_pred, outside_true_phi.id }); anchor_pred->ir.phi.push_back(std::move(take_anchor_phi)); anchor_pred->ir.phi.push_back(std::move(outside_true_phi)); anchor_terminator->ir.phi.push_back(std::move(terminator_selector)); anchor_dispatcher->ir.terminator.conditional_id = anchor_cond_phi.id; anchor_dispatcher->ir.terminator.type = Terminator::Type::Condition; anchor_dispatcher->ir.terminator.true_block = constructs[0]; anchor_dispatcher->ir.terminator.false_block = constructs[1]; anchor_dispatcher->add_branch(constructs[0]); anchor_dispatcher->add_branch(constructs[1]); anchor_dispatcher->ir.phi.push_back(std::move(anchor_cond_phi)); } void CFGStructurizer::collect_and_dispatch_control_flow( CFGNode *common_idom, CFGNode *common_pdom, const Vector &constructs, bool collect_all_code_paths_to_pdom, bool allow_crossing_branches) { assert(constructs.size() >= 2); auto &builder = module.get_builder(); bool need_default_case = false; bool plain_branch = false; size_t cutoff_index = 0; CFGNode *dispatcher; // If there is no strict dominance relationship, it's too risky to freeze a loop here, // since we may have stray breaks that will invert merge ordering, and cause issues. // Freezing control flow is important for interleaved merge patterns where we don't want to explode // the control flow ladders all over the place. bool freeze_control_flow = !common_idom->pred_back_edge && common_pdom->post_dominates(common_idom); if (freeze_control_flow) { // Also check that there are no edges that leave the scope between common_idom // and common_pdom and don't freeze if so. // node->forward_post_visit_order should map 1:1 to the post-visit array, // but in extreme circumstances where there have been inline cfg rewrites before recompute, // this may not be true, so be defensive. auto itr = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), common_pdom); auto end = std::find(forward_post_visit_order.begin(), forward_post_visit_order.end(), common_idom); assert(itr != forward_post_visit_order.end()); assert(end != forward_post_visit_order.end()); const auto can_reach_any_construct = [&](const CFGNode *succ) { for (auto *construct : constructs) if (query_reachability(*succ, *construct)) return true; return false; }; const auto any_succ_escapes_constructs = [&](const CFGNode *n) { // idom is not included in the loop below, but it can branch beyond all constructs. for (auto *succ : n->succ) if (!can_reach_any_construct(succ)) return true; return false; }; const auto is_construct = [&](const CFGNode *n) { return std::find(constructs.begin(), constructs.end(), n) != constructs.end(); }; if (!collect_all_code_paths_to_pdom) { // idom is not included in the loop below, but it can branch beyond all constructs. freeze_control_flow = !any_succ_escapes_constructs(common_idom); } for (; itr != end && freeze_control_flow; ++itr) { CFGNode *node = *itr; if (!common_idom->dominates(node)) continue; if (node->succ_back_edge != nullptr && node->succ_back_edge != common_idom && query_reachability(*node->succ_back_edge, *common_idom)) { // Branches backwards. freeze_control_flow = false; } else if (!collect_all_code_paths_to_pdom && !is_construct(node) && common_idom->dominates(node) && can_reach_any_construct(node) && any_succ_escapes_constructs(node)) { // If we're using the simple collector, we merge at the constructs instead. // Make absolutely sure this is safe to merge to be checking that the dispatch point would be // a suitable merge. freeze_control_flow = false; } } } PHI phi; phi.id = module.allocate_id(); if (collect_all_code_paths_to_pdom) { // In some merge scenarios, we need to make sure we encapsulate all code into this new dispatcher. // This will become our new merge block. Incoming impossible merges will be transposed to after this new merge. dispatcher = create_helper_pred_block(common_pdom); for (auto *candidate : constructs) traverse_dominated_blocks_and_rewrite_branch(candidate, dispatcher, common_pdom); cutoff_index = dispatcher->pred.size(); // If there is no direct branch intended for node, the default case label will never be reached, // so just pilfer one of the cases as default. need_default_case = !dispatcher->pred.empty(); plain_branch = !need_default_case && constructs.size() == 2; if (!plain_branch) { for (size_t i = 0; i < cutoff_index; i++) phi.incoming.push_back({ dispatcher->pred[i], builder.makeIntConstant(-1) }); phi.type_id = builder.makeIntType(32); } else { phi.type_id = builder.makeBoolType(); } } else { dispatcher = pool.create_node(); dispatcher->name = common_idom->name + ".collector"; dispatcher->immediate_dominator = common_idom; dispatcher->immediate_post_dominator = common_pdom; dispatcher->forward_post_visit_order = common_pdom->forward_post_visit_order; dispatcher->backward_post_visit_order = common_pdom->backward_post_visit_order; plain_branch = constructs.size() == 2; } phi.type_id = plain_branch ? builder.makeBoolType() : builder.makeIntType(32); for (size_t i = 0, n = constructs.size(); i < n; i++) { auto *candidate = constructs[i]; if (allow_crossing_branches) { traverse_dominated_blocks_and_rewrite_branch(common_idom, candidate, dispatcher, [](const CFGNode *) { return true; }, constructs); } else { traverse_dominated_blocks_and_rewrite_branch(common_idom, candidate, dispatcher); } size_t next_cutoff_index = dispatcher->pred.size(); for (size_t j = cutoff_index; j < next_cutoff_index; j++) { spv::Id cond_id; if (plain_branch) cond_id = builder.makeBoolConstant(i != 0); else cond_id = builder.makeIntConstant(int32_t(i)); phi.incoming.push_back({ dispatcher->pred[j], cond_id }); } cutoff_index = next_cutoff_index; } if (freeze_control_flow) { common_idom->freeze_structured_analysis = true; common_idom->merge = MergeType::Loop; common_idom->loop_merge_block = dispatcher; } dispatcher->ir.terminator.conditional_id = phi.id; dispatcher->ir.phi.push_back(std::move(phi)); builder.addName(phi.id, String("selector_" + common_pdom->name).c_str()); dispatcher->ir.terminator.direct_block = nullptr; dispatcher->clear_branches(); if (plain_branch) { dispatcher->ir.terminator.type = Terminator::Type::Condition; dispatcher->ir.terminator.false_block = constructs[0]; dispatcher->ir.terminator.true_block = constructs[1]; dispatcher->add_branch(constructs[0]); dispatcher->add_branch(constructs[1]); } else { dispatcher->ir.terminator.type = Terminator::Type::Switch; Terminator::Case default_case; default_case.node = need_default_case ? common_pdom : constructs[0]; default_case.is_default = true; dispatcher->ir.terminator.cases.push_back(default_case); dispatcher->add_branch(default_case.node); for (size_t i = 0, n = constructs.size(); i < n; i++) { auto *candidate = constructs[i]; assert(allow_crossing_branches || candidate->pred.empty() || candidate == default_case.node); dispatcher->add_branch(candidate); if (need_default_case || i) { Terminator::Case break_case; break_case.node = candidate; break_case.value = uint32_t(i); dispatcher->ir.terminator.cases.push_back(break_case); } } } } bool CFGStructurizer::rewrite_complex_loop_exits(CFGNode *node, CFGNode *merge, Vector &dominated_exits) { if (!merge || !node->pred_back_edge->succ.empty() || dominated_exits.size() < 2) return false; // This heuristic is somewhat questionable. :') bool needs_early_explicit_ladder = false; CFGNode *common_idom = node; // Use a stricter definition when there is a clean merge candidate. if (node->can_loop_merge_to(merge)) { // If all nodes share a frontier node which is not the target merge block, we have a spicy merge // that should be collected in a ladder first, since there is no natural latter block // in this scenario. The shared frontier node is the more plausible true merge target, // and the outer merge was a red herring, but since we don't have a proper ladder block, // it will complicate things. Vector frontier_nodes; for (auto *n : dominated_exits) { frontier_nodes.insert(frontier_nodes.end(), n->dominance_frontier.begin(), n->dominance_frontier.end()); } std::sort(frontier_nodes.begin(), frontier_nodes.end()); const CFGNode *frontier_base = nullptr; uint32_t count = 0; for (auto *n : frontier_nodes) { if (n == frontier_base) count++; else count = 1; frontier_base = n; if (count == dominated_exits.size() && n != merge && query_reachability(*n, *merge)) { needs_early_explicit_ladder = true; break; } } } else { // If we cannot do a clean merge anyway, then we should try to look for frontier nodes. auto frontier_nodes = dominated_exits; // Skip forward to the dominance frontier. This makes control flow easier to deal with // since unrelated branches to the frontiers can also be resolved. // This heuristic is admittedly somewhat arbitrary, // but it is meant to help on some specific real-world shaders. for (auto *&n : frontier_nodes) { if (n->dominance_frontier.size() == 1 && !node->dominates(n->dominance_frontier.front())) { n = n->dominance_frontier.front(); } else { // We don't have a clean frontier, skip this check. frontier_nodes.clear(); break; } } if (!frontier_nodes.empty()) { // If the frontiers are all different, and it's not the merge block, something is afoot. // Don't sort by pointer since we care about codegen invariance. std::sort(frontier_nodes.begin(), frontier_nodes.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order > b->forward_post_visit_order; }); bool has_dup_frontier = false; for (size_t i = 1, n = frontier_nodes.size(); i < n && !has_dup_frontier; i++) if (frontier_nodes[i] == frontier_nodes[i - 1] || frontier_nodes[i] == merge) has_dup_frontier = true; if (has_dup_frontier) frontier_nodes.clear(); } if (!frontier_nodes.empty()) { // Make sure that the frontier nodes we found fully dominate all preds of merge, // otherwise, the transpose of code will likely break. for (auto *pred : merge->pred) { bool has_dominating = false; for (auto *f : frontier_nodes) { if (f->dominates(pred)) { has_dominating = true; break; } } if (!has_dominating) { frontier_nodes.clear(); break; } } } if (!frontier_nodes.empty()) { needs_early_explicit_ladder = true; // First collect the inner break blocks in a neat bow. node->pred_back_edge->fake_succ.clear(); node->pred_back_edge->fake_pred.clear(); collect_and_dispatch_control_flow(node, merge, dominated_exits, false, false); recompute_cfg(); // Then collect the outer layer. dominated_exits = std::move(frontier_nodes); common_idom = merge->immediate_dominator; } } if (needs_early_explicit_ladder) { // Avoids false-positive assertions when trying to rewrite branches. // We're going to recompute the CFG after this anyway. node->pred_back_edge->fake_succ.clear(); node->pred_back_edge->fake_pred.clear(); collect_and_dispatch_control_flow(common_idom, merge, dominated_exits, false, false); return true; } return false; } bool CFGStructurizer::find_loops(unsigned pass) { for (auto index = forward_post_visit_order.size(); index; index--) { // Visit in reverse order so we resolve outer loops first, // this lets us detect ladder-breaking loops. auto *node = forward_post_visit_order[index - 1]; if (node->freeze_structured_analysis) { // If we have a pre-created dummy loop for ladding breaking, // just propagate the header information and be done with it. if (node->merge == MergeType::Loop) { node->loop_merge_block->headers.push_back(node); continue; } } if (!node->has_pred_back_edges()) continue; // There are back-edges here, this must be a loop header. node->merge = MergeType::Loop; auto result = analyze_loop(node); auto &dominated_exit = result.dominated_exit; auto &inner_dominated_exit = result.inner_dominated_exit; auto &non_dominated_exit = result.non_dominated_exit; // This should not come up here, and must be handled in transpose loops. assert(result.dominated_continue_exit.empty()); // Detect infinite loop with an exit which is only in inner loop construct. // It is impossible to construct a merge block in this case since the merge targets, // so just merge to unreachable. bool force_infinite_loop = false; // If we have a trivial case where there is only one possible loop exit which we dominate, // we shouldn't consider it an infinite loop, but a merge. bool trivial_exit_loop = dominated_exit.size() == 1 && result.non_dominated_exit.empty() && result.inner_dominated_exit.empty() && result.direct_exits.empty() && result.inner_direct_exits.empty(); if (trivial_exit_loop) { auto *candidate = dominated_exit.front(); // Resolve some false positives. It's possible that a loop exit can be detected as inner, // but it's just a good merge candidate for an inner infinite loop. bool loop_exit_dominates_continue = candidate->immediate_dominator && candidate->immediate_dominator->dominates(node->pred_back_edge); // If we promoted inner header, this is not a trivial exit. const CFGNode *innermost_loop_header = get_innermost_loop_header_for(node, dominated_exit.front()); if (node != innermost_loop_header) { // There are at least two scenarios where we have to be careful: // - If the innermost header has a edge out of continue block. // If we still detect this as belong to inner loop, it must be the case. // - Also, only accept this as a trivial exit if the immediate dominator of exit also dominates // continue block. if (!innermost_loop_header->pred_back_edge->succ.empty() || !loop_exit_dominates_continue) trivial_exit_loop = false; } } if (node->pred_back_edge->succ.empty() && !trivial_exit_loop) { force_infinite_loop = true; for (auto *e : result.dominated_exit) force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e); for (auto *e : result.non_dominated_exit) force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e); for (auto *e : result.inner_dominated_exit) force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e); for (auto *e : result.direct_exits) force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e); for (auto *e : result.inner_direct_exits) force_infinite_loop = force_infinite_loop && loop_exit_supports_infinite_loop(node, e); } if (force_infinite_loop || (dominated_exit.empty() && inner_dominated_exit.empty() && non_dominated_exit.empty())) { // There can be zero loop exits, i.e. infinite loop. This means we have no merge block. // We will invent a merge block to satisfy SPIR-V validator, and declare it as unreachable. node->loop_merge_block = nullptr; //LOGI("Loop without merge: %p (%s)\n", static_cast(node), node->name.c_str()); } else if (dominated_exit.size() == 1 && non_dominated_exit.empty() && inner_dominated_exit.empty()) { CFGNode *direct_exit_pdom = nullptr; if (!result.direct_exits.empty()) direct_exit_pdom = find_common_post_dominator(result.direct_exits); if (direct_exit_pdom && query_reachability(*dominated_exit.front(), *direct_exit_pdom)) { node->loop_ladder_block = dominated_exit.front(); node->loop_merge_block = direct_exit_pdom; } else { // Clean merge. // This is a unique merge block. There can be no other merge candidate. node->loop_merge_block = dominated_exit.front(); } const_cast(node->loop_merge_block)->add_unique_header(node); //LOGI("Loop with simple merge: %p (%s) -> %p (%s)\n", static_cast(node), node->name.c_str(), // static_cast(node->loop_merge_block), node->loop_merge_block->name.c_str()); } else if (dominated_exit.empty() && inner_dominated_exit.empty() && non_dominated_exit.size() == 1) { // Single-escape merge. // It is unique, but we need workarounds later. auto *merge_block = non_dominated_exit.front(); // We can make the non-dominated exit dominated by // adding a ladder block in-between. This allows us to merge the loop cleanly // before breaking out. auto *ladder = create_ladder_block(node, merge_block, ".merge"); node->loop_ladder_block = nullptr; node->loop_merge_block = ladder; const_cast(node->loop_merge_block)->add_unique_header(node); //LOGI("Loop with ladder merge: %p (%s) -> %p (%s)\n", static_cast(node), node->name.c_str(), // static_cast(node->loop_merge_block), node->loop_merge_block->name.c_str()); } else { auto merge_result = analyze_loop_merge(node, result); auto *merge = merge_result.merge; auto *dominated_merge = merge_result.dominated_merge; if (pass == 0 && rewrite_complex_loop_exits(node, merge, dominated_exit)) return true; if (!merge) { // Most likely this means we have an early return somewhere. Try the weak merge candidate. merge = merge_result.weak_merge; } if (!merge) { LOGW("Failed to find a common merge point ...\n"); } else if (node->can_loop_merge_to(merge)) { // Clean merge. // This is a unique merge block. There can be no other merge candidate. //LOGI("Loop with simple multi-exit merge: %p (%s) -> %p (%s)\n", static_cast(node), // node->name.c_str(), static_cast(node->loop_merge_block), // node->loop_merge_block->name.c_str()); node->loop_merge_block = merge; const_cast(node->loop_merge_block)->add_unique_header(node); } else { if (!dominated_merge && node->pred_back_edge->succ.size() == 1) { // If continue block exits, and it still does not dominate, we should invent a ladder block // so we get one, otherwise splitting merge scopes will break. dominated_merge = create_ladder_block(node->pred_back_edge, node->pred_back_edge->succ.front(), ".merge"); } // Single-escape merge. // It is unique, but we need workarounds later. //LOGI("Loop with ladder multi-exit merge: %p (%s) -> %p (%s)\n", static_cast(node), // node->name.c_str(), static_cast(node->loop_merge_block), // node->loop_merge_block->name.c_str()); //if (dominated_merge) //{ //LOGI(" Ladder block: %p (%s)\n", static_cast(dominated_merge), // dominated_merge->name.c_str()); //} // We will use this block as a ladder. node->loop_ladder_block = dominated_merge; node->loop_merge_block = merge; const_cast(node->loop_merge_block)->add_unique_header(node); } } } return false; } CFGNode *CFGStructurizer::get_target_break_block_for_inner_header(const CFGNode *node, size_t header_index) { CFGNode *inner_header = node->headers[header_index]; CFGNode *target_header = nullptr; for (size_t j = header_index; j && !target_header; j--) { auto *candidate_header = node->headers[j - 1]; if (candidate_header->merge == MergeType::Loop) { // We might have two loops, each at equal scopes. // In order to break out to an outer loop, we must verify that the loops actually nest. // We must not introduce any backwards branches here. CFGNode *candidate_merge = nullptr; if (candidate_header->loop_ladder_block) candidate_merge = candidate_header->loop_ladder_block; else if (candidate_header->loop_merge_block) candidate_merge = candidate_header->loop_merge_block; if (!candidate_merge) continue; // Check for backwards branch. if (query_reachability(*candidate_merge, *inner_header)) continue; // An outer header is expected to dominate the inner header. Otherwise, they live in // separate scopes, and we should look for a header that is further out. if (!candidate_header->dominates(inner_header)) continue; target_header = candidate_header; } } return target_header; } CFGNode *CFGStructurizer::create_ladder_block(CFGNode *header, CFGNode *node, const char *tag) { auto *ladder = pool.create_node(); ladder->name = node->name + tag; ladder->add_branch(node); ladder->ir.terminator.type = Terminator::Type::Branch; ladder->ir.terminator.direct_block = node; ladder->immediate_post_dominator = node; ladder->forward_post_visit_order = node->forward_post_visit_order; ladder->backward_post_visit_order = node->backward_post_visit_order; ladder->dominance_frontier.push_back(node); traverse_dominated_blocks_and_rewrite_branch(header, node, ladder); ladder->recompute_immediate_dominator(); return ladder; } CFGNode *CFGStructurizer::get_or_create_ladder_block(CFGNode *node, size_t header_index) { auto *header = node->headers[header_index]; auto *loop_ladder = header->loop_ladder_block; if (!loop_ladder) { // We don't have a ladder, because the loop merged to an outer scope, so we need to fake a ladder. // If we hit this case, we did not hit the simpler case in find_loops(). auto *ladder = create_ladder_block(header, node, ".merge"); header->loop_ladder_block = ladder; // If this is the second outermost scope, we don't need to deal with ladders. // ladder is a dummy branch straight out to the outer merge point. if (header_index > 1) loop_ladder = header->loop_ladder_block; } return loop_ladder; } CFGNode *CFGStructurizer::build_enclosing_break_target_for_loop_ladder(CFGNode *&node, CFGNode *loop_ladder) { // A loop ladder needs to break out somewhere. If we don't have a candidate // place to break out to, we will need to create one for the outer scope. // This is the purpose of the full_break_target fallback. bool ladder_to_merge_is_trivial = loop_ladder->succ.size() == 1 && loop_ladder->succ.front() == node; if (ladder_to_merge_is_trivial) { auto *succ = loop_ladder->succ.front(); // Chase through dummy ladders until we find something tangible that is actually PHI sensitive. while (succ->ir.phi.empty() && succ->succ.size() == 1) succ = succ->succ.front(); IncomingValue *incoming_from_ladder = nullptr; if (!succ->ir.phi.empty()) { // All PHIs are fundamentally the same w.r.t. input blocks. auto &phi = succ->ir.phi.front(); incoming_from_ladder = phi_incoming_blocks_find_block(phi.incoming, loop_ladder); } CFGNode *retarget_idom = nullptr; if (incoming_from_ladder != nullptr) { // If succ takes this ladder as a PHI input, we have to be careful. // We can only treat this merge as trivial if we can trivially hoist the input to the idom. // Hoisting to idom only works if that idom is not already a PHI input for succ, // and that idom dominates the input value. retarget_idom = loop_ladder->immediate_dominator; bool can_hoist_incoming_value = retarget_idom && retarget_idom != loop_ladder && !phi_incoming_blocks_find_block(succ->ir.phi.front().incoming, retarget_idom); if (!can_hoist_incoming_value) retarget_idom = nullptr; } if (retarget_idom) { bool is_generated = false; // We have no opcodes in loop ladder, but theoretically, // we can have some PHI values that are being depended on. for (auto &override_phi : succ->ir.phi) { auto *incoming = phi_incoming_blocks_find_block(override_phi.incoming, loop_ladder); if (!incoming) continue; if (id_is_generated_by_block(loop_ladder, incoming->id)) { is_generated = true; break; } } if (!is_generated) { // If we don't generate the ID ourselves and idom dominates this block we can prove // that idom is a valid incoming value. for (auto &override_phi : succ->ir.phi) retarget_phi_incoming_block(override_phi, loop_ladder, retarget_idom); } else { // It's not a trivial merge after all :( ladder_to_merge_is_trivial = false; } } } CFGNode *full_break_target = nullptr; // We have to break somewhere, turn the outer selection construct into // a loop. if (!ladder_to_merge_is_trivial) { // Selection merge to this dummy instead. auto *new_selection_merge = create_helper_pred_block(node); // This is now our fallback loop break target. full_break_target = node; auto *loop = create_helper_pred_block(new_selection_merge->headers[0]); // Reassign header node. assert(new_selection_merge->headers[0]->merge == MergeType::Selection); new_selection_merge->headers[0]->selection_merge_block = new_selection_merge; new_selection_merge->headers[0] = loop; loop->merge = MergeType::Loop; loop->loop_merge_block = node; loop->freeze_structured_analysis = true; // After the loop ladder, make sure we always branch to the break target. traverse_dominated_blocks_and_rewrite_branch(loop_ladder, new_selection_merge, node); node = new_selection_merge; } return full_break_target; } CFGNode *CFGStructurizer::build_ladder_block_for_escaping_edge_handling(CFGNode *node, CFGNode *header, CFGNode *loop_ladder, CFGNode *target_header, CFGNode *full_break_target, const UnorderedSet &normal_preds) { CFGNode *new_ladder_block = nullptr; if (target_header || full_break_target) { // If we have a ladder block, there exists a merge candidate which the loop header dominates. // We create a ladder block before the merge block, which becomes the true merge block. // In this ladder block, we can detect with Phi nodes whether the break was "clean", // or if we had an escape edge. // If we have an escape edge, we can break to outer level, and continue the ladder that way. // Otherwise we branch to the existing merge block and continue as normal. // We'll also need to rewrite a lot of Phi nodes this way as well. auto *ladder = create_helper_pred_block(loop_ladder); new_ladder_block = ladder; unsigned header_index; for (header_index = 0; header_index < uint32_t(node->headers.size()); header_index++) if (node->headers[header_index] == header) break; assert(header_index != node->headers.size()); // Merge to ladder instead. // If we're fixing up ladders for header index 0 it means we've already rewritten everything, // only apply the last fixup branch. if (header_index != 0 || block_is_plain_continue(node)) { traverse_dominated_blocks_and_rewrite_branch( header, node, ladder, [node, header_index](const CFGNode *next) { for (unsigned i = 0; i < header_index; i++) { auto *target = node->headers[i]; // Do not introduce cycles. Outer scopes must never be rewritten to branch to inner scopes. if (target && target->loop_ladder_block == next) return false; } return true; }, {}); } CFGNode *true_block = nullptr; // Ladder breaks out to outer scope. if (target_header && target_header->loop_ladder_block) true_block = target_header->loop_ladder_block; else if (target_header && target_header->loop_merge_block) true_block = target_header->loop_merge_block; else if (full_break_target) true_block = full_break_target; else LOGW("No loop merge block?\n"); if (true_block) { rewrite_ladder_conditional_branch_from_incoming_blocks( ladder, true_block, loop_ladder, [&](const CFGNode *n) { return normal_preds.count(n) == 0; }, String("ladder_phi_") + loop_ladder->name); // This can happen in some scenarios, fixup the branch to be a direct one instead. if (ladder->ir.terminator.true_block == ladder->ir.terminator.false_block) { ladder->ir.terminator.direct_block = ladder->ir.terminator.true_block; ladder->ir.terminator.type = Terminator::Type::Branch; } } } else { // Here, loop_ladder -> final merge is a trivial, direct branch. if (loop_ladder->ir.operations.empty()) { // Simplest common case. // If the loop ladder just branches to outer scope, and this block does not perform // any operations we can avoid messing around with ladder PHI variables and just execute the branch. // This block will likely become a frontier node when merging PHI instead. // This is a common case when breaking out of a simple for loop. traverse_dominated_blocks_and_rewrite_branch(header, node, loop_ladder); } else { // We have a case where we're trivially breaking out of a selection construct, // but the loop ladder block contains operations which we must not execute, // since we were supposed to branch directly out to node. // We cannot directly break out of a selection construct, so our ladder must be a bit more sophisticated. // ladder-pre -> merge -> ladder-post -> selection merge // \-------------------/ auto *ladder_pre = create_helper_pred_block(loop_ladder); auto *ladder_post = create_helper_succ_block(loop_ladder); // Merge to ladder instead. traverse_dominated_blocks_and_rewrite_branch(header, node, ladder_pre); rewrite_ladder_conditional_branch_from_incoming_blocks( ladder_pre, ladder_post, loop_ladder, [&](const CFGNode *n) { return normal_preds.count(n) == 0; }, String("ladder_phi_") + loop_ladder->name); new_ladder_block = ladder_pre; } } return new_ladder_block; } void CFGStructurizer::eliminate_degenerate_switch_merges() { for (auto *node : forward_post_visit_order) { if (node->headers.size() <= 1) continue; // In the second pass, it's illegal to have more than two target headers, so we have to turn some // headers into unreachable. The outermost scope wins. std::sort(node->headers.begin(), node->headers.end(), [](const CFGNode *a, const CFGNode *b) -> bool { if (a->dominates(b)) return true; else if (b->dominates(a)) return false; else return a->forward_post_visit_order > b->forward_post_visit_order; }); // Can only elide if we have a true loop merge to this node. if (node->headers[0]->merge != MergeType::Loop || node->headers[0]->loop_merge_block != node) continue; for (size_t i = 1, n = node->headers.size(); i < n; i++) { auto *header = node->headers[i]; // This cannot possibly work with loops. // We can generally turn selections into unreachable merges without trouble however ... if (header->merge == MergeType::Selection && header->selection_merge_block == node) header->selection_merge_block = nullptr; } } } bool CFGStructurizer::is_rewind_candidate_split_node( const Vector &visited_orphans, CFGNode *node, CFGNode *candidate) const { if (node->forward_post_visit_order != candidate->forward_post_visit_order) return false; if (is_trivially_no_split_node(candidate)) return false; if (std::find(visited_orphans.begin(), visited_orphans.end(), candidate) != visited_orphans.end()) return false; // For whatever reason, the node is no longer a meaningful merge target. for (auto *header : candidate->headers) if (!candidate->can_backtrace_to(header)) return false; // If we created a new helper pred block during traversal, it might not // exist in forward_post_visit_order. // Look for the replacement block here to make sure it gets processed in the appropriate order. // The replacement can happen in-line in this function, // so there is no chance to re-traverse the CFG. // Only consider blocks that we trivially post-dominate and that // definitely have no entry in forward_post_visit_order already. if (candidate->succ.size() != 1 || candidate->succ.front() != node) { // This is a ladder block of some sort. It's possible we're already in a "resolved" state, // so we really should not try to split further. // If we're considered a proper ladder block by any of our headers, bail. for (auto *header : candidate->headers) if (header->loop_ladder_block == node || header->loop_merge_block == node) return false; } return true; } bool CFGStructurizer::is_trivially_no_split_node(CFGNode *node) const { if (node->headers.size() <= 1 && !block_is_plain_continue(node)) return true; // It's possible that we have just one header. // One loop has a ladder block which is not this block, but the post-dominator is a pure continue block. // This gets rather awkward, since we need to special case this scenario. if (node->headers.empty()) return true; return false; } bool CFGStructurizer::split_merge_blocks(CFGNode *node) { if (is_trivially_no_split_node(node)) return false; // If this block was the merge target for more than one construct, // we will need to split the block. In SPIR-V, a merge block can only be the merge target for one construct. // However, we can set up a chain of merges where inner scope breaks to outer scope with a dummy basic block. // The outer scope comes before the inner scope merge. // We cannot fully trust a sort on post-visit order, since if we have two split blocks here, // they will have the same post-visit order until we recompute them. // FIXME: Should probably be smarter about this ... std::sort(node->headers.begin(), node->headers.end(), [](const CFGNode *a, const CFGNode *b) -> bool { if (a->dominates(b)) return true; else if (b->dominates(a)) return false; else return a->forward_post_visit_order > b->forward_post_visit_order; }); //LOGI("Splitting merge blocks for %s\n", node->name.c_str()); //for (auto *header : node->headers) // LOGI(" Header: %s.\n", header->name.c_str()); CFGNode *full_break_target = nullptr; // If we're a plain continue block, we're implicitly the full break target. bool plain_continue_resolve = block_is_plain_continue(node); if (plain_continue_resolve) full_break_target = node; // Before we start splitting and rewriting branches, we need to know which preds are considered "normal", // and which branches are considered ladder breaking branches (rewritten branches). // This will influence if a pred block gets false or true when emitting ladder breaking blocks later. Vector> normal_preds(node->headers.size()); for (size_t i = 0; i < node->headers.size(); i++) if (node->headers[i]->loop_ladder_block) for (auto *pred : node->headers[i]->loop_ladder_block->pred) normal_preds[i].insert(pred); bool has_rewrites_to_outer_ladder = false; // Start from innermost scope, and rewrite all escape branches to a merge block which is dominated by the loop header in question. // The merge block for the loop must have a ladder block before the old merge block. // This ladder block will break to outer scope, or keep executing the old merge block. for (size_t i = node->headers.size() - 1; i || plain_continue_resolve; i--) { auto *current_node = node->headers[i]; // Find innermost loop header scope we can break to when resolving ladders. CFGNode *target_header = i != 0 ? get_target_break_block_for_inner_header(node, i) : nullptr; //LOGI("Current: %s, target: %s.\n", current_node->name.c_str(), target_header->name.c_str()); if (current_node->merge == MergeType::Loop) { auto *loop_ladder = get_or_create_ladder_block(node, i); // The loop ladder needs to break to somewhere. // Either this is an outer loop scope, or we need to create a fake loop we can break out of if // the break is non-trivial. if (loop_ladder && !target_header && !full_break_target) full_break_target = build_enclosing_break_target_for_loop_ladder(node, loop_ladder); CFGNode *new_ladder_block = nullptr; if (loop_ladder) { new_ladder_block = build_ladder_block_for_escaping_edge_handling( node, current_node, loop_ladder, target_header, full_break_target, normal_preds[i]); if (target_header == node->headers[0]) has_rewrites_to_outer_ladder = true; } // We won't analyze this again, so make sure header knows // about the new merge block. if (current_node->freeze_structured_analysis) { if (new_ladder_block) current_node->loop_ladder_block = new_ladder_block; current_node->loop_merge_block = current_node->loop_ladder_block; current_node->loop_ladder_block = nullptr; } } else if (current_node->merge == MergeType::Selection) { if (target_header) { // Breaks out to outer available scope. CFGNode *rewrite_to = nullptr; if (target_header->loop_ladder_block) rewrite_to = target_header->loop_ladder_block; else if (target_header->loop_merge_block) rewrite_to = target_header->loop_merge_block; if (rewrite_to) { traverse_dominated_blocks_and_rewrite_branch(current_node, node, rewrite_to); if (target_header == node->headers[0]) has_rewrites_to_outer_ladder = true; } else LOGW("No loop merge block?\n"); } else if (full_break_target) { traverse_dominated_blocks_and_rewrite_branch(current_node, node, full_break_target); } else { // The outer scope *must* now become a loop, no matter what. // We cannot rely on a traversal to rewrite breaking constructs in the entire loop, // so "everything" must essentially become a break instead. full_break_target = node; assert(node->headers[0]->merge == MergeType::Selection); node->headers[0]->merge = MergeType::Loop; node->headers[0]->freeze_structured_analysis = true; assert(node->headers[0]->selection_merge_block == node); node->headers[0]->loop_merge_block = node->headers[0]->selection_merge_block; node->headers[0]->selection_merge_block = nullptr; } } else LOGE("Invalid merge type.\n"); if (i == 0) break; } auto *outer_header = node->headers[0]; if (has_rewrites_to_outer_ladder && outer_header->merge == MergeType::Loop && outer_header->loop_ladder_block && outer_header->loop_merge_block && outer_header->loop_ladder_block->dominates(outer_header->loop_merge_block)) { auto *ladder = outer_header->loop_ladder_block; bool non_trivial_ladder = !ladder->ir.operations.empty() || ladder_chain_has_phi_dependencies(ladder, outer_header->loop_merge_block); if (non_trivial_ladder) { // It's possible we have branches that intended to rewrite to loop_merge_block // but ended up writing to loop_ladder_block instead. // Perform a final fixup branch if this is necessary. // If the ladder block is a dummy, we can ignore this. build_ladder_block_for_escaping_edge_handling(node, outer_header, outer_header->loop_ladder_block, nullptr, outer_header->loop_merge_block, normal_preds[0]); } } return true; } void CFGStructurizer::split_merge_blocks_and_visit_orphan_preds( Vector &visited_orphans, CFGNode *merge, CFGNode *node) { if (split_merge_blocks(node)) return; for (auto *pred : node->pred) { if (is_rewind_candidate_split_node(visited_orphans, merge, pred)) { visited_orphans.push_back(pred); split_merge_blocks_and_visit_orphan_preds(visited_orphans, merge, pred); } } } void CFGStructurizer::split_merge_blocks() { Vector visited_orphans; for (auto *node : forward_post_visit_order) split_merge_blocks_and_visit_orphan_preds(visited_orphans, node, node); } bool CFGStructurizer::structurize(unsigned pass) { auto switch_mode = process_switch_blocks(pass); while (switch_mode == SwitchProgressMode::IterativeModify) { // For complex rewrites, we damage the CFG, so need to start over every iteration. recompute_cfg(); switch_mode = process_switch_blocks(pass); } // After a trivial modify, we must be able to complete the process in one iteration. if (switch_mode == SwitchProgressMode::SimpleModify) { recompute_cfg(); if (process_switch_blocks(pass) != SwitchProgressMode::Done) { LOGE("Fatal, detected infinite loop.\n"); abort(); } } if (find_loops(pass)) return true; find_selection_merges(pass); fixup_broken_selection_merges(pass); if (pass == 0) split_merge_blocks(); else eliminate_degenerate_switch_merges(); return false; } bool CFGStructurizer::exists_path_in_cfg_without_intermediate_node(const CFGNode *start_block, const CFGNode *end_block, const CFGNode *stop_block) const { // If we're resolving PHI for a frontier inside a loop, consider the back-edge as the end target for analysis. // If we start outside the loop, don't move the end block. if (end_block->pred_back_edge && !query_reachability(*stop_block, *end_block) && !query_reachability(*start_block, *end_block)) { end_block = end_block->pred_back_edge; } if (query_reachability(*start_block, *end_block) && query_reachability(*start_block, *stop_block) && query_reachability(*stop_block, *end_block)) { auto *frontier = get_post_dominance_frontier_with_cfg_subset_that_reaches(stop_block, end_block, start_block); // We already know start_block reaches the frontier. return frontier != nullptr; } else { bool ret = query_reachability_through_back_edges(*start_block, *end_block); return ret; } } CFGNode *CFGStructurizer::get_post_dominance_frontier_with_cfg_subset_that_reaches(const CFGNode *node, const CFGNode *must_reach, const CFGNode *must_reach_frontier) const { UnorderedSet promoted_post_dominators; promoted_post_dominators.insert(node); auto frontiers = node->post_dominance_frontier; assert(query_reachability(*node, *must_reach)); if (frontiers.empty()) return nullptr; while (!frontiers.empty()) { // We might not be interested in post-domination-frontiers that we cannot reach. // Filter our search based on this. if (must_reach_frontier) { auto itr = std::remove_if(frontiers.begin(), frontiers.end(), [&](CFGNode *candidate) { return !query_reachability(*must_reach_frontier, *candidate); }); frontiers.erase(itr, frontiers.end()); } if (frontiers.size() > 1) { std::sort(frontiers.begin(), frontiers.end(), [](const CFGNode *a, const CFGNode *b) { return a->backward_post_visit_order < b->backward_post_visit_order; }); frontiers.erase(std::unique(frontiers.begin(), frontiers.end()), frontiers.end()); } else if (frontiers.empty()) break; auto *frontier = frontiers.back(); // For a frontier to be discounted, we look at all successors and check // if there no node in promoted_post_dominators that post-dominate the successor, that path cannot reach must_reach. // If a post dominance frontier satisfies this rule, it is promoted to be considered an alias of node. bool all_succs_must_go_via_node = true; for (auto *succ : frontier->succ) { bool promote = true; if (query_reachability(*succ, *must_reach)) { promote = false; for (auto *pdom : promoted_post_dominators) { if (pdom->post_dominates(succ)) { promote = true; break; } } } if (!promote) { all_succs_must_go_via_node = false; break; } } if (!all_succs_must_go_via_node) { return frontier; } else { promoted_post_dominators.insert(frontier); frontiers.pop_back(); for (auto *pdoms : frontier->post_dominance_frontier) frontiers.push_back(pdoms); } } return frontiers.empty() ? nullptr : frontiers.front(); } void CFGStructurizer::recompute_post_dominance_frontier(CFGNode *node) { for (auto *pred : node->pred) { if (pred->immediate_post_dominator != node && std::find(node->post_dominance_frontier.begin(), node->post_dominance_frontier.end(), pred) == node->post_dominance_frontier.end()) { node->post_dominance_frontier.push_back(pred); } if (auto *ipdom = node->immediate_post_dominator) { for (auto *frontier_node : node->post_dominance_frontier) { if (!ipdom->post_dominates(frontier_node) && std::find(ipdom->post_dominance_frontier.begin(), ipdom->post_dominance_frontier.end(), frontier_node) == ipdom->post_dominance_frontier.end()) { ipdom->post_dominance_frontier.push_back(frontier_node); } } } } } void CFGStructurizer::recompute_dominance_frontier(CFGNode *node) { for (auto *succ : node->succ) { if (succ->immediate_dominator != node && std::find(node->dominance_frontier.begin(), node->dominance_frontier.end(), succ) == node->dominance_frontier.end()) { node->dominance_frontier.push_back(succ); } if (auto *idom = node->immediate_dominator) { for (auto *frontier_node : node->dominance_frontier) { if (!idom->dominates(frontier_node) && std::find(idom->dominance_frontier.begin(), idom->dominance_frontier.end(), frontier_node) == idom->dominance_frontier.end()) { idom->dominance_frontier.push_back(frontier_node); } } } } } bool CFGStructurizer::rewrite_invalid_loop_breaks() { // Keep iterating here until we have validated a clean CFG w.r.t. block-like loops. // This should pass through first time without issue with extremely high probability, // so hitting the slow path isn't a real concern until proven otherwise. CFGNode *rewrite_header = nullptr; CFGNode *invalid_target = nullptr; CFGNode *invalid_merge = nullptr; // Process from inside out. for (auto *node : forward_post_visit_order) { // Structured loop constructs can end up with problematic merge scenarios where we missed // some cases where blocks branch outside our construct. // At some point, we were considered mere selection constructs and breaking out of it is fine, // but if the selection is promoted to a loop at some point after this analysis, we are a bit screwed. // This can happen in complex ladder resolve scenarios. // The fix-up means introducing multiple levels of ladder blocks. if (node->merge == MergeType::Loop && node->freeze_structured_analysis) { auto *merge = node->loop_merge_block; if (!merge || merge->post_dominates(node)) continue; node->traverse_dominated_blocks([&](CFGNode *candidate) { if (candidate == merge || invalid_target) return false; // If the succ can reach outside the loop construct, we have an error condition. for (auto *succ : candidate->succ) { bool can_reach_merge = query_reachability(*succ, *merge); auto *candidate_continue = scan_plain_continue_block(succ); // Need to be a bit more careful about continue blocks in infinite loops. // Include loop exits as well in the reachability analysis. if (!can_reach_merge && candidate_continue->succ_back_edge) { for (auto *fake_succ : candidate_continue->fake_succ) { if (query_reachability(*fake_succ, *merge)) { can_reach_merge = true; break; } } } if (!can_reach_merge) { // Determine if we're an inner terminate/return, or a loop exit. // If the common post-dominator is EXIT node, this is a return-like relationship, // and we skip any fixup. auto *pdom = CFGNode::find_common_post_dominator(succ, merge); if (pdom != nullptr && !pdom->pred.empty()) invalid_target = succ; } } return true; }); if (invalid_target) { rewrite_header = node; break; } } else if (node->merge == MergeType::Loop && node->loop_merge_block && node->pred_back_edge && node->pred_back_edge->succ.empty()) { // Only consider "infinite" loops here. Otherwise, the break from continue will always be // a suitable merge target and the ladder block for any loop exits. if (!node->dominates(node->loop_merge_block)) { // We must dominate the loop merge block here. // There is a risk that with breaks happening into multiple scopes in certain cases, // we won't be able to guarantee this in the two-phase structurizer. invalid_merge = node; break; } } } if (invalid_merge) { auto result = analyze_loop(invalid_merge); result.dominated_exit.insert(result.dominated_exit.end(), result.non_dominated_exit.begin(), result.non_dominated_exit.end()); collect_and_dispatch_control_flow(invalid_merge, invalid_merge->loop_merge_block, result.dominated_exit, false, false); recompute_cfg(); return true; } if (invalid_target) { auto *merge = rewrite_header->loop_merge_block; auto *dispatcher = create_helper_pred_block(merge); rewrite_header->loop_merge_block = dispatcher; size_t natural_preds = dispatcher->pred.size(); traverse_dominated_blocks_and_rewrite_branch(rewrite_header, invalid_target, dispatcher); PHI phi; phi.id = module.allocate_id(); phi.type_id = module.get_builder().makeBoolType(); module.get_builder().addName(phi.id, (String("break_selector_") + merge->name).c_str()); for (size_t i = 0; i < natural_preds; i++) { IncomingValue incoming = {}; incoming.block = dispatcher->pred[i]; incoming.id = module.get_builder().makeBoolConstant(true); phi.incoming.push_back(incoming); } for (size_t i = natural_preds, n = dispatcher->pred.size(); i < n; i++) { IncomingValue incoming = {}; incoming.block = dispatcher->pred[i]; incoming.id = module.get_builder().makeBoolConstant(false); phi.incoming.push_back(incoming); } dispatcher->ir.terminator.type = Terminator::Type::Condition; dispatcher->ir.terminator.true_block = merge; dispatcher->ir.terminator.false_block = invalid_target; dispatcher->ir.terminator.direct_block = nullptr; dispatcher->ir.terminator.conditional_id = phi.id; dispatcher->ir.phi.push_back(std::move(phi)); dispatcher->add_branch(invalid_target); recompute_cfg(); return true; } return false; } void CFGStructurizer::traverse(BlockEmissionInterface &iface) { // Make sure all blocks are known to the backend before we emit code. // Prefer that IDs grow the further down the function we go. for (auto itr = forward_post_visit_order.rbegin(); itr != forward_post_visit_order.rend(); ++itr) { (*itr)->id = 0; iface.register_block(*itr); } // Need to emit blocks such that dominating blocks come before dominated blocks. for (auto index = forward_post_visit_order.size(); index; index--) { auto *block = forward_post_visit_order[index - 1]; auto &merge = block->ir.merge_info; switch (block->merge) { case MergeType::Selection: merge.merge_block = block->selection_merge_block; if (merge.merge_block) iface.register_block(merge.merge_block); merge.merge_type = block->merge; iface.emit_basic_block(block); break; case MergeType::Loop: merge.merge_block = block->loop_merge_block; merge.merge_type = block->merge; merge.continue_block = block->pred_back_edge; if (merge.merge_block) iface.register_block(merge.merge_block); if (merge.continue_block) iface.register_block(merge.continue_block); iface.emit_basic_block(block); break; default: iface.emit_basic_block(block); break; } } } template void CFGStructurizer::traverse_dominated_blocks_and_rewrite_branch(const CFGNode *dominator, CFGNode *candidate, CFGNode *from, CFGNode *to, const Op &op, const Vector &barrier, UnorderedSet &visitation_cache) { visitation_cache.insert(candidate); for (auto *node : candidate->succ) { if (!op(node)) continue; if (node == from) { // Don't introduce a cycle. // We only retarget branches when we have "escape-like" edges. bool introduces_cycle; if ((to->forward_post_visit_order == candidate->forward_post_visit_order && to != candidate) || (from->forward_post_visit_order == candidate->forward_post_visit_order && from != candidate)) { // Can happen when resolving ladders. We cannot use reachability query, do it slow way. introduces_cycle = candidate->can_backtrace_to(to); } else { introduces_cycle = query_reachability(*to, *candidate); } if (!introduces_cycle) { // If we already have a branch to "to", need to branch there via an intermediate node. // This way, we can distinguish between a normal branch and a rewritten branch. candidate->retarget_branch_with_intermediate_node(from, to); } } else if (dominator->dominates(node) && node != to && std::find(barrier.begin(), barrier.end(), node) == barrier.end()) { // Do not traverse beyond the new branch target. if (!visitation_cache.count(node)) traverse_dominated_blocks_and_rewrite_branch(dominator, node, from, to, op, barrier, visitation_cache); } } // In case we are rewriting branches to a new merge block, we might // change the immediate post dominator for continue blocks inside this loop construct. // When analysing post dominance in these cases, we need to make sure that we merge to the new merge block, // and not the old one. This avoids some redundant awkward loop constructs. for (auto &fake_next : candidate->fake_succ) { if (fake_next == from) { candidate->retarget_fake_succ(from, to); break; } } } template void CFGStructurizer::traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to, const Op &op, const Vector &barrier) { if (from == to) return; UnorderedSet visitation_cache; traverse_dominated_blocks_and_rewrite_branch(dominator, dominator, from, to, op, barrier, visitation_cache); dominator->fixup_merge_info_after_branch_rewrite(from, to); // Force all post-domination information to be recomputed. Vector linear_visitation_cache; linear_visitation_cache.reserve(visitation_cache.size()); for (auto *n : visitation_cache) { if (n->immediate_post_dominator == from) { if (n->fake_succ.empty()) { n->immediate_post_dominator = nullptr; // Ignore any infinite continue blocks. // They wreak havoc in post-dominance analysis. linear_visitation_cache.push_back(n); } else { // Infinite loop blocks must not be traversed again. n->immediate_post_dominator = to; } } } // Will recompute everything that was cleared out. // Compute later nodes first. This way we avoid a potential recursive loop. std::sort(linear_visitation_cache.begin(), linear_visitation_cache.end(), [](const CFGNode *a, const CFGNode *b) { return a->forward_post_visit_order < b->forward_post_visit_order; }); for (auto *n : linear_visitation_cache) if (!n->immediate_post_dominator) n->recompute_immediate_post_dominator(); dominator->recompute_immediate_post_dominator(); } void CFGStructurizer::traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to) { traverse_dominated_blocks_and_rewrite_branch(dominator, from, to, [](const CFGNode *node) -> bool { return true; }, {}); } } // namespace dxil_spv ================================================ FILE: cfg_structurizer.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "thread_local_allocator.hpp" #include "ir.hpp" #include #include namespace dxil_spv { class BlockEmissionInterface; class SPIRVModule; struct CFGNode; class CFGNodePool; class BlockEmissionInterface { public: virtual ~BlockEmissionInterface() = default; virtual void emit_basic_block(CFGNode *node) = 0; virtual void register_block(CFGNode *node) = 0; }; class CFGStructurizer { public: CFGStructurizer(CFGNode *entry, CFGNodePool &pool, SPIRVModule &module); bool run(); bool run_trivial(); void traverse(BlockEmissionInterface &iface); CFGNode *get_entry_block() const; bool rewrite_rov_lock_region(); void rewrite_auto_group_shared_barrier(); void flatten_subgroup_shuffles(); void fixup_loop_header_undef_phis(); private: CFGNode *entry_block; CFGNode *exit_block; CFGNodePool &pool; SPIRVModule &module; // For dominance analysis. Vector forward_post_visit_order; // For post-dominance analysis. Vector backward_post_visit_order; Vector reachability_bitset; unsigned reachability_stride = 0; UnorderedSet reachable_nodes; UnorderedSet structured_loop_merge_targets; void visit(CFGNode &entry); void visit_for_back_edge_analysis(CFGNode &entry); void backwards_visit(); void backwards_visit(CFGNode &entry); void build_immediate_dominators(); void build_immediate_post_dominators(); void build_reachability(); void visit_reachability(const CFGNode &node); bool query_reachability(const CFGNode &from, const CFGNode &to) const; bool structurize(unsigned pass); bool find_loops(unsigned pass); bool rewrite_complex_loop_exits(CFGNode *node, CFGNode *merge, Vector &dominated_exits); bool rewrite_transposed_loops(); struct LoopAnalysis { Vector direct_exits; Vector inner_direct_exits; Vector dominated_exit; Vector inner_dominated_exit; Vector non_dominated_exit; Vector dominated_continue_exit; }; LoopAnalysis analyze_loop(CFGNode *node) const; struct LoopMergeAnalysis { CFGNode *merge; CFGNode *weak_merge; CFGNode *dominated_merge; CFGNode *infinite_continue_ladder; }; LoopMergeAnalysis analyze_loop_merge(CFGNode *node, const LoopAnalysis &analysis); void rewrite_transposed_loop_inner(CFGNode *node, CFGNode *impossible_merge_target, const LoopMergeAnalysis &analysis); void rewrite_transposed_loop_outer(CFGNode *node, CFGNode *impossible_merge_target, const LoopMergeAnalysis &analysis); static bool is_strictly_dominance_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c); bool is_reachability_ordered(const CFGNode *a, const CFGNode *b, const CFGNode *c); bool serialize_interleaved_merge_scopes_aggressive(); bool serialize_interleaved_merge_scopes(); bool serialize_interleaved_early_returns(); static Vector> build_pdf_ranges(const Vector &candidates); static bool pdf_ranges_have_strict_dominance_ordering( const Vector> &candidates); void filter_serialization_candidates(Vector &candidates) const; void split_merge_scopes(); bool is_rewind_candidate_split_node(const Vector &visited_orphans, CFGNode *node, CFGNode *candidate) const; bool is_trivially_no_split_node(CFGNode *node) const; void eliminate_degenerate_blocks(); static bool ladder_chain_has_phi_dependencies(const CFGNode *chain, const CFGNode *incoming); void duplicate_impossible_merge_constructs(); void duplicate_node(CFGNode *node); static bool can_duplicate_phis(const CFGNode *node); Operation *duplicate_op(Operation *op, UnorderedMap &id_remap); void update_structured_loop_merge_targets(); void find_selection_merges(unsigned pass); bool header_and_merge_block_have_entry_exit_relationship(const CFGNode *header, const CFGNode *merge) const; void fixup_broken_selection_merges(unsigned pass); bool selection_requires_structured_header(const CFGNode *node) const; enum class SwitchProgressMode { Done, SimpleModify, IterativeModify }; SwitchProgressMode process_switch_blocks(unsigned pass); void hoist_switch_branches_to_frontier(CFGNode *node, CFGNode *merge, CFGNode *frontier); Operation *build_switch_case_equal_check(const CFGNode *header, CFGNode *insert_node, const Terminator::Case &case_label); CFGNode *create_switch_merge_ladder(CFGNode *header, CFGNode *merge); CFGNode *find_natural_switch_merge_block(CFGNode *node, CFGNode *post_dominator) const; const CFGNode *get_innermost_loop_header_for(const CFGNode *node) const; const CFGNode *get_innermost_loop_header_for(const CFGNode *header, const CFGNode *node) const; bool loop_exit_supports_infinite_loop(const CFGNode *header, const CFGNode *loop_exit) const; void split_merge_blocks(); bool split_merge_blocks(CFGNode *node); void split_merge_blocks_and_visit_orphan_preds(Vector &visited, CFGNode *merge, CFGNode *node); void eliminate_degenerate_switch_merges(); bool merge_candidate_is_on_breaking_path(const CFGNode *node) const; bool merge_candidate_is_inside_continue_construct(const CFGNode *node) const; bool continue_block_can_merge(CFGNode *node) const; static bool block_is_plain_continue(const CFGNode *node); static const CFGNode *scan_plain_continue_block(const CFGNode *node); // Create a new block. Rewrite all branches to node from blocks that are dominated by header to that block. // The new block then branches to node. CFGNode *create_ladder_block(CFGNode *header, CFGNode *node, const char *tag); CFGNode *get_target_break_block_for_inner_header(const CFGNode *node, size_t header_index); CFGNode *get_or_create_ladder_block(CFGNode *node, size_t header_index); CFGNode *build_enclosing_break_target_for_loop_ladder(CFGNode *&node, CFGNode *loop_ladder); CFGNode *build_ladder_block_for_escaping_edge_handling(CFGNode *node, CFGNode *header, CFGNode *loop_ladder, CFGNode *target_header, CFGNode *full_break_target, const UnorderedSet &normal_preds); static CFGNode *find_common_post_dominator(const Vector &candidates); static CFGNode *find_common_post_dominator_with_ignored_break(Vector candidates, const CFGNode *break_node); CFGNode *find_break_target_for_selection_construct(CFGNode *idom, CFGNode *merge); bool control_flow_is_escaping(const CFGNode *node, const CFGNode *merge) const; bool control_flow_is_escaping_from_loop(const CFGNode *node, const CFGNode *merge) const; bool block_is_load_bearing(const CFGNode *node, const CFGNode *merge) const; static Vector isolate_structured_sorted(const CFGNode *header, const CFGNode *merge); static void isolate_structured(UnorderedSet &nodes, const CFGNode *header, const CFGNode *merge); static Vector::const_iterator find_incoming_value(const CFGNode *frontier_pred, const Vector &incoming); void rewrite_selection_breaks(CFGNode *header, CFGNode *ladder_to); enum class LoopExitType { Exit, Merge, Escape, MergeToInfiniteLoop, InnerLoopExit, InnerLoopMerge, InnerLoopFalsePositive }; LoopExitType get_loop_exit_type(const CFGNode &header, const CFGNode &node) const; CFGNode *create_helper_pred_block(CFGNode *node); CFGNode *create_helper_succ_block(CFGNode *node); void reset_traversal(); bool rewrite_invalid_loop_breaks(); void recompute_cfg(); void rewrite_multiple_back_edges(); bool rewrite_impossible_back_edges(); void compute_dominance_frontier(); void compute_post_dominance_frontier(); void create_continue_block_ladders(); static void recompute_dominance_frontier(CFGNode *node); static void recompute_post_dominance_frontier(CFGNode *node); static void merge_to_succ(CFGNode *node, unsigned index); void retarget_pred_from(CFGNode *new_node, CFGNode *old_succ); void retarget_succ_from(CFGNode *new_node, CFGNode *old_pred); CFGNode *get_post_dominance_frontier_with_cfg_subset_that_reaches(const CFGNode *node, const CFGNode *must_reach, const CFGNode *must_reach_frontier) const; bool exists_path_in_cfg_without_intermediate_node(const CFGNode *start_block, const CFGNode *end_block, const CFGNode *stop_block) const; struct PHINode { CFGNode *block; unsigned phi_index; }; Vector phi_nodes; void insert_phi(); void insert_phi(PHINode &node); void fixup_phi(PHINode &node); void cleanup_breaking_phi_constructs(); bool block_is_breaking_phi_construct(const CFGNode *node) const; bool cleanup_breaking_return_constructs(); void eliminate_node_link_preds_to_succ(CFGNode *node); void prune_dead_preds(); void fixup_broken_value_dominance(); UnorderedMap value_id_to_block; void log_cfg(const char *tag) const; void log_cfg_graphviz(const char *path) const; static bool can_complete_phi_insertion(const PHI &phi, const CFGNode *end_node); bool query_reachability_through_back_edges(const CFGNode &from, const CFGNode &to) const; bool query_reachability_split_loop_header(const CFGNode &from, const CFGNode &to, const CFGNode &end_node) const; bool phi_frontier_makes_forward_progress(const PHI &phi, const CFGNode *frontier, const CFGNode *end_node) const; void traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to); template void traverse_dominated_blocks_and_rewrite_branch(CFGNode *dominator, CFGNode *from, CFGNode *to, const Op &op, const Vector &barrier); template void traverse_dominated_blocks_and_rewrite_branch(const CFGNode *dominator, CFGNode *candidate, CFGNode *from, CFGNode *to, const Op &op, const Vector &barrier, UnorderedSet &visitation_cache); CFGNode *transpose_code_path_through_ladder_block(CFGNode *header, CFGNode *merge, CFGNode *succ); void rewrite_ladder_conditional_branch_from_incoming_blocks( CFGNode *ladder, CFGNode *true_block, CFGNode *false_block, const std::function &path_cb, const String &name); void propagate_branch_control_hints(); void remove_unused_ssa(); bool find_single_entry_exit_lock_region( CFGNode *&idom, CFGNode *&pdom, const Vector &rov_blocks); bool execution_path_is_single_entry_and_dominates_exit(CFGNode *idom, CFGNode *pdom); void collect_and_dispatch_control_flow( CFGNode *common_idom, CFGNode *common_pdom, const Vector &constructs, bool collect_all_code_paths_to_pdom, bool allow_crossing_branches); void collect_and_dispatch_control_flow_from_anchor( CFGNode *anchor, const Vector &constructs); void sink_ssa_constructs(); void sink_ssa_constructs_run(bool dry_run); }; } // namespace dxil_spv ================================================ FILE: checkout_dxc.sh ================================================ #!/bin/bash DXC_REV=a9d33d3500d37bd24c10288c76aca8e1c948d4a2 if [ -d external/DirectXShaderCompiler ]; then echo "Updating DirectXShaderCompiler to revision $DXC_REV." cd external/DirectXShaderCompiler git fetch origin git checkout $DXC_REV git submodule update --init else echo "Cloning DirectXShaderCompiler revision $DXC_REV." mkdir -p external cd external git clone https://github.com/Microsoft/DirectXShaderCompiler.git cd DirectXShaderCompiler git checkout $DXC_REV git submodule update --init fi ================================================ FILE: checkout_llvm.sh ================================================ #!/bin/bash LLVM_REV=2c4ca6832fa6b306ee6a if [ -z $PROTOCOL ]; then PROTOCOL=git fi echo "Using protocol \"$PROTOCOL\" for checking out repositories. If this is problematic, try PROTOCOL=https $0." if [ -d external/llvm ]; then echo "Updating LLVM to revision $LLVM_REV." cd external/llvm git fetch origin git checkout $LLVM_REV else echo "Cloning LLVM revision $LLVM_REV." mkdir -p external cd external git clone $PROTOCOL://github.com/llvm-mirror/llvm.git cd llvm git checkout $LLVM_REV fi ================================================ FILE: copy_reference_shaders.py ================================================ #!/usr/bin/env python3 # Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation # # SPDX-License-Identifier: MIT # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import sys import os import argparse import shutil import hashlib def hashstr(path): with open(path, 'rb') as f: bytes = f.read() if len(bytes) < 4: print('Skipping file', path, 'due to size < 4.') return None if bytes[0:4] != b'DXBC': print('Skipping broken file', path) return None result = hashlib.sha1(bytes).hexdigest() return result def add_tags(path, noglsl): if not noglsl: return path else: return path[:-4] + 'noglsl.dxil' def copy_reference_shader(output_dir, input_path, raw, noglsl): modified_input_path = add_tags(input_path, noglsl) if raw: shutil.copy(input_path, os.path.join(output_dir, os.path.basename(modified_input_path))) else: name = hashstr(input_path) if name is not None: shutil.copy(input_path, os.path.join(output_dir, name + ('.noglsl' if noglsl else '') + '.dxil')) def main(): parser = argparse.ArgumentParser(description = 'Script for copying VKD3D shader dumps to regression suite.') parser.add_argument('--dxil', help = 'Folder containing a bunch of .dxil shaders.') parser.add_argument('--dxbc', help = 'Folder containing a bunch of .dxbc shaders.') parser.add_argument('--output', required = True, help = 'Output directory.') parser.add_argument('--raw', help = 'Skip hashing. Files must be in format $hash.dxil', action = 'store_true') parser.add_argument('--noglsl', help = 'Add .noglsl. tag.', action = 'store_true') args = parser.parse_args() if args.dxil is not None: for root, dirs, files in os.walk(args.dxil): for file in files: ext = os.path.splitext(file)[1] if ext == '.dxil': print('Copying DXIL reference file:', file) copy_reference_shader(args.output, os.path.join(args.dxil, file), args.raw, args.noglsl) if args.dxbc is not None: for root, dirs, files in os.walk(args.dxbc): for file in files: ext = os.path.splitext(file)[1] if ext == '.dxbc': print('Copying DXBC reference file:', file) copy_reference_shader(args.output, os.path.join(args.dxbc, file), args.raw, args.noglsl) if __name__ == '__main__': main() ================================================ FILE: debug/logging.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "logging.hpp" namespace dxil_spv { static thread_local LoggingCallback log_callback; static thread_local void *log_userdata; void set_thread_log_callback(LoggingCallback callback, void *userdata) { log_callback = callback; log_userdata = userdata; } LoggingCallback get_thread_log_callback() { return log_callback; } void *get_thread_log_callback_userdata() { return log_userdata; } } ================================================ FILE: debug/logging.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include #include #if defined(_MSC_VER) #define WIN32_LEAN_AND_MEAN #include #define LOGE_INNER(...) \ do \ { \ fprintf(stderr, "[ERROR]: " __VA_ARGS__); \ fflush(stderr); \ char buffer[4096]; \ snprintf(buffer, sizeof(buffer), "[ERROR]: " __VA_ARGS__); \ OutputDebugStringA(buffer); \ } while (false) #define LOGW_INNER(...) \ do \ { \ fprintf(stderr, "[WARN]: " __VA_ARGS__); \ fflush(stderr); \ char buffer[4096]; \ snprintf(buffer, sizeof(buffer), "[WARN]: " __VA_ARGS__); \ OutputDebugStringA(buffer); \ } while (false) #define LOGI_INNER(...) \ do \ { \ fprintf(stderr, "[INFO]: " __VA_ARGS__); \ fflush(stderr); \ char buffer[4096]; \ snprintf(buffer, sizeof(buffer), "[INFO]: " __VA_ARGS__); \ OutputDebugStringA(buffer); \ } while (false) #elif defined(ANDROID) #include #define LOGE_INNER(...) __android_log_print(ANDROID_LOG_ERROR, "dxil-spirv", __VA_ARGS__) #define LOGW_INNER(...) __android_log_print(ANDROID_LOG_WARN, "dxil-spirv", __VA_ARGS__) #define LOGI_INNER(...) __android_log_print(ANDROID_LOG_INFO, "dxil-spirv", __VA_ARGS__) #else #define LOGE_INNER(...) \ do \ { \ fprintf(stderr, "[ERROR]: " __VA_ARGS__); \ fflush(stderr); \ } while (false) #define LOGW_INNER(...) \ do \ { \ fprintf(stderr, "[WARN]: " __VA_ARGS__); \ fflush(stderr); \ } while (false) #define LOGI_INNER(...) \ do \ { \ fprintf(stderr, "[INFO]: " __VA_ARGS__); \ fflush(stderr); \ } while (false) #endif namespace dxil_spv { enum class LogLevel : uint32_t { Debug = 0, Warn = 1, Error = 2 }; using LoggingCallback = void (*)(void *, LogLevel, const char *); void set_thread_log_callback(LoggingCallback callback, void *userdata); LoggingCallback get_thread_log_callback(); void *get_thread_log_callback_userdata(); } #define LOGI(...) do { \ if (auto *cb = ::dxil_spv::get_thread_log_callback()) \ { \ char buffer[4096]; \ snprintf(buffer, sizeof(buffer), __VA_ARGS__); \ cb(::dxil_spv::get_thread_log_callback_userdata(), ::dxil_spv::LogLevel::Debug, buffer); \ } \ else \ { \ LOGI_INNER(__VA_ARGS__); \ } \ } while(0) #define LOGW(...) do { \ if (auto *cb = ::dxil_spv::get_thread_log_callback()) \ { \ char buffer[4096]; \ snprintf(buffer, sizeof(buffer), __VA_ARGS__); \ cb(::dxil_spv::get_thread_log_callback_userdata(), ::dxil_spv::LogLevel::Warn, buffer); \ } \ else \ { \ LOGW_INNER(__VA_ARGS__); \ } \ } while(0) #define LOGE(...) do { \ if (auto *cb = ::dxil_spv::get_thread_log_callback()) \ { \ char buffer[4096]; \ snprintf(buffer, sizeof(buffer), __VA_ARGS__); \ cb(::dxil_spv::get_thread_log_callback_userdata(), ::dxil_spv::LogLevel::Error, buffer); \ } \ else \ { \ LOGE_INNER(__VA_ARGS__); \ } \ } while(0) ================================================ FILE: descriptor_qa.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "descriptor_qa.hpp" #include "spirv_module.hpp" #include "SpvBuilder.h" #include "logging.hpp" namespace dxil_spv { static spv::Id build_descriptor_qa_heap_buffer_type(spv::Builder &builder) { Vector member_types; // DescriptorHeapQAData { // uint descriptor_count; // uint heap_id; // uvec3 cookies_descriptor_info[]; // } spv::Id u32_type = builder.makeUintType(32); spv::Id uvec3_type = builder.makeVectorType(u32_type, 3); spv::Id uvec3_arr_type = builder.makeRuntimeArray(uvec3_type); builder.addDecoration(uvec3_arr_type, spv::DecorationArrayStride, 12); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(uvec3_arr_type); spv::Id id = builder.makeStructType(member_types, "DescriptorHeapQAData"); const auto set_info = [&](DescriptorQAHeapMembers member, int offset, const char *name) { builder.addMemberDecoration(id, int(member), spv::DecorationOffset, offset); builder.addMemberName(id, int(member), name); }; set_info(DescriptorQAHeapMembers::DescriptorCount, 0, "descriptor_count"); set_info(DescriptorQAHeapMembers::HeapIndex, 4, "heap_index"); set_info(DescriptorQAHeapMembers::CookiesDescriptorInfo, 8, "cookies_descriptor_info"); builder.addDecoration(id, spv::DecorationBlock); return id; } static spv::Id build_descriptor_global_buffer_type(spv::Builder &builder) { Vector member_types; // DescriptorHeapQAGlobalData { // uvec2 failed_shader_hash; // uint failed_offset; // uint failed_heap; // uint failed_cookie; // uint fault_atomic; // uint failed_instruction; // uint failed_descriptor_type_mask; // uint actual_descriptor_type_mask; // uint fault_type; // uint va_map_timestamp; // uint live_status_table[]; // } spv::Id u32_type = builder.makeUintType(32); spv::Id uvec2_type = builder.makeVectorType(u32_type, 2); spv::Id u32_arr_type = builder.makeRuntimeArray(u32_type); builder.addDecoration(u32_arr_type, spv::DecorationArrayStride, 4); member_types.push_back(uvec2_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_type); member_types.push_back(u32_arr_type); spv::Id id = builder.makeStructType(member_types, "DescriptorHeapGlobalQAData"); const auto set_info = [&](DescriptorQAGlobalMembers member, int offset, const char *name) { builder.addMemberDecoration(id, int(member), spv::DecorationOffset, offset); builder.addMemberName(id, int(member), name); }; set_info(DescriptorQAGlobalMembers::FailedShaderHash, 0, "failed_shader_hash"); set_info(DescriptorQAGlobalMembers::FailedOffset, 8, "failed_offset"); set_info(DescriptorQAGlobalMembers::FailedHeap, 12, "failed_heap"); set_info(DescriptorQAGlobalMembers::FailedCookie, 16, "failed_cookie"); set_info(DescriptorQAGlobalMembers::FaultAtomic, 20, "fault_atomic"); set_info(DescriptorQAGlobalMembers::FailedInstruction, 24, "failed_instruction"); set_info(DescriptorQAGlobalMembers::FailedDescriptorTypeMask, 28, "failed_descriptor_type_mask"); set_info(DescriptorQAGlobalMembers::ActualDescriptorTypeMask, 32, "actual_descriptor_type_mask"); set_info(DescriptorQAGlobalMembers::FaultType, 36, "fault_type"); set_info(DescriptorQAGlobalMembers::VAMapTimestamp, 40, "va_map_timestamp"); set_info(DescriptorQAGlobalMembers::LiveStatusTable, 44, "live_status_table"); builder.addDecoration(id, spv::DecorationBlock); return id; } static spv::Id build_ssbo_load(spv::Builder &builder, spv::Id value_type, spv::Id ssbo_id, uint32_t member) { spv::Id ptr_id = builder.makePointer(spv::StorageClassStorageBuffer, value_type); auto chain = std::make_unique(builder.getUniqueId(), ptr_id, spv::OpAccessChain); chain->addIdOperand(ssbo_id); chain->addIdOperand(builder.makeUintConstant(member)); auto load = std::make_unique(builder.getUniqueId(), value_type, spv::OpLoad); load->addIdOperand(chain->getResultId()); spv::Id result_id = load->getResultId(); builder.getBuildPoint()->addInstruction(std::move(chain)); builder.getBuildPoint()->addInstruction(std::move(load)); return result_id; } static void build_ssbo_store(spv::Builder &builder, spv::Id value_type, spv::Id ssbo_id, uint32_t member, spv::Id value_id) { spv::Id ptr_id = builder.makePointer(spv::StorageClassStorageBuffer, value_type); auto chain = std::make_unique(builder.getUniqueId(), ptr_id, spv::OpAccessChain); chain->addIdOperand(ssbo_id); chain->addIdOperand(builder.makeUintConstant(member)); auto store = std::make_unique(spv::OpStore); store->addIdOperand(chain->getResultId()); store->addIdOperand(value_id); if (builder.hasCapability(spv::CapabilityVulkanMemoryModel)) store->addImmediateOperand(spv::MemoryAccessNonPrivatePointerMask); builder.getBuildPoint()->addInstruction(std::move(chain)); builder.getBuildPoint()->addInstruction(std::move(store)); } static spv::Id build_ssbo_load_array(spv::Builder &builder, spv::Id value_type, spv::Id ssbo_id, uint32_t member, spv::Id offset) { spv::Id ptr_id = builder.makePointer(spv::StorageClassStorageBuffer, value_type); auto chain = std::make_unique(builder.getUniqueId(), ptr_id, spv::OpAccessChain); chain->addIdOperand(ssbo_id); chain->addIdOperand(builder.makeUintConstant(member)); chain->addIdOperand(offset); auto load = std::make_unique(builder.getUniqueId(), value_type, spv::OpLoad); load->addIdOperand(chain->getResultId()); spv::Id result_id = load->getResultId(); builder.getBuildPoint()->addInstruction(std::move(chain)); builder.getBuildPoint()->addInstruction(std::move(load)); return result_id; } static void build_cookie_descriptor_info_split(spv::Builder &builder, spv::Id composite_id, spv::Id &cookie_id, spv::Id &cookie_shifted_id, spv::Id &cookie_masked_id, spv::Id &descriptor_timestamp_id, spv::Id &descriptor_info_id) { spv::Id u32_type = builder.makeUintType(32); auto *cookie = builder.addInstruction(u32_type, spv::OpCompositeExtract); cookie->addIdOperand(composite_id); cookie->addImmediateOperand(0); auto *descriptor_timestamp = builder.addInstruction(u32_type, spv::OpCompositeExtract); descriptor_timestamp->addIdOperand(composite_id); descriptor_timestamp->addImmediateOperand(1); auto *descriptor_type = builder.addInstruction(u32_type, spv::OpCompositeExtract); descriptor_type->addIdOperand(composite_id); descriptor_type->addImmediateOperand(2); auto *shifted = builder.addInstruction(u32_type, spv::OpShiftRightLogical); shifted->addIdOperand(cookie->getResultId()); shifted->addIdOperand(builder.makeUintConstant(5)); auto *masked = builder.addInstruction(u32_type, spv::OpBitwiseAnd); masked->addIdOperand(cookie->getResultId()); masked->addIdOperand(builder.makeUintConstant(31)); cookie_id = cookie->getResultId(); descriptor_timestamp_id = descriptor_timestamp->getResultId(); descriptor_info_id = descriptor_type->getResultId(); cookie_shifted_id = shifted->getResultId(); cookie_masked_id = masked->getResultId(); } static spv::Id build_live_check(spv::Builder &builder, spv::Id status_id, spv::Id bit_id) { spv::Id u32_type = builder.makeUintType(32); auto shift_up = std::make_unique(builder.getUniqueId(), u32_type, spv::OpShiftLeftLogical); shift_up->addIdOperand(builder.makeUintConstant(1)); shift_up->addIdOperand(bit_id); auto mask = std::make_unique(builder.getUniqueId(), u32_type, spv::OpBitwiseAnd); mask->addIdOperand(status_id); mask->addIdOperand(shift_up->getResultId()); auto cond = std::make_unique(builder.getUniqueId(), builder.makeBoolType(), spv::OpINotEqual); cond->addIdOperand(mask->getResultId()); cond->addIdOperand(builder.makeUintConstant(0)); spv::Id res = cond->getResultId(); builder.getBuildPoint()->addInstruction(std::move(shift_up)); builder.getBuildPoint()->addInstruction(std::move(mask)); builder.getBuildPoint()->addInstruction(std::move(cond)); return res; } static spv::Id build_binary_op(spv::Builder &builder, spv::Id type, spv::Op opcode, spv::Id a, spv::Id b) { auto op = std::make_unique(builder.getUniqueId(), type, opcode); op->addIdOperand(a); op->addIdOperand(b); spv::Id ret = op->getResultId(); builder.getBuildPoint()->addInstruction(std::move(op)); return ret; } static void build_ssbo_barrier(spv::Builder &builder) { auto barrier = std::make_unique(spv::OpMemoryBarrier); barrier->addIdOperand(builder.getAtomicDeviceScopeId()); barrier->addIdOperand(builder.makeUintConstant(spv::MemorySemanticsUniformMemoryMask | spv::MemorySemanticsAcquireReleaseMask)); builder.getBuildPoint()->addInstruction(std::move(barrier)); } static void build_descriptor_qa_fault_report(SPIRVModule &module, spv::Id &func_id, spv::Id &buffer_id) { auto &builder = module.get_builder(); spv::Id global_buffer_type_id = build_descriptor_global_buffer_type(builder); spv::Id descriptor_qa_global_buffer_id = module.create_variable(spv::StorageClassStorageBuffer, global_buffer_type_id, "QAGlobalData"); buffer_id = descriptor_qa_global_buffer_id; builder.addDecoration(descriptor_qa_global_buffer_id, spv::DecorationDescriptorSet, module.get_descriptor_qa_info().global_desc_set); builder.addDecoration(descriptor_qa_global_buffer_id, spv::DecorationBinding, module.get_descriptor_qa_info().global_binding); auto *current_build_point = builder.getBuildPoint(); spv::Block *entry = nullptr; Vector param_types(7, builder.makeUintType(32)); auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeVoidType(), "descriptor_qa_report_fault", param_types, {}, &entry); func_id = func->getId(); spv::Id fault_type_id = func->getParamId(0); spv::Id heap_offset_id = func->getParamId(1); spv::Id cookie_id = func->getParamId(2); spv::Id heap_id = func->getParamId(3); spv::Id descriptor_type_id = func->getParamId(4); spv::Id actual_descriptor_type_id = func->getParamId(5); spv::Id instruction_id = func->getParamId(6); builder.addName(fault_type_id, "fault_type"); builder.addName(heap_offset_id, "heap_offset"); builder.addName(cookie_id, "cookie"); builder.addName(heap_id, "heap_index"); builder.addName(descriptor_type_id, "descriptor_type"); builder.addName(actual_descriptor_type_id, "actual_descriptor_type"); builder.addName(instruction_id, "instruction"); spv::Id u32_type = builder.makeUintType(32); spv::Id u32_ptr_type = builder.makePointer(spv::StorageClassStorageBuffer, u32_type); auto chain = std::make_unique(builder.getUniqueId(), u32_ptr_type, spv::OpAccessChain); chain->addIdOperand(descriptor_qa_global_buffer_id); chain->addIdOperand(builder.makeUintConstant(uint32_t(DescriptorQAGlobalMembers::FaultAtomic))); auto increment = std::make_unique(builder.getUniqueId(), u32_type, spv::OpAtomicIAdd); increment->addIdOperand(chain->getResultId()); increment->addIdOperand(builder.getAtomicDeviceScopeId()); increment->addIdOperand(builder.makeUintConstant(0)); increment->addIdOperand(builder.makeUintConstant(1)); auto check = std::make_unique(builder.getUniqueId(), builder.makeBoolType(), spv::OpIEqual); check->addIdOperand(increment->getResultId()); check->addIdOperand(builder.makeUintConstant(0)); spv::Id check_id = check->getResultId(); auto *true_block = new spv::Block(builder.getUniqueId(), *func); auto *false_block = new spv::Block(builder.getUniqueId(), *func); builder.setBuildPoint(entry); entry->addInstruction(std::move(chain)); entry->addInstruction(std::move(increment)); entry->addInstruction(std::move(check)); builder.createSelectionMerge(false_block, 0); builder.createConditionalBranch(check_id, true_block, false_block); builder.setBuildPoint(true_block); { build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::FailedCookie), cookie_id); build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::FailedOffset), heap_offset_id); build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::FailedHeap), heap_id); build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::FailedDescriptorTypeMask), descriptor_type_id); build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::ActualDescriptorTypeMask), actual_descriptor_type_id); build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::FailedInstruction), instruction_id); spv::Id uvec2_type = builder.makeVectorType(u32_type, 2); Vector comps; comps.push_back(builder.makeUintConstant(uint32_t(module.get_descriptor_qa_info().shader_hash))); comps.push_back(builder.makeUintConstant(uint32_t(module.get_descriptor_qa_info().shader_hash >> 32u))); spv::Id hash_id = builder.makeCompositeConstant(uvec2_type, comps); build_ssbo_store(builder, uvec2_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::FailedShaderHash), hash_id); // Device memory barrier here so that if host observed fault_type != 0, // we're certain that the other values are correct as well. build_ssbo_barrier(builder); build_ssbo_store(builder, u32_type, descriptor_qa_global_buffer_id, uint32_t(DescriptorQAGlobalMembers::FaultType), fault_type_id); builder.createBranch(false_block); } builder.setBuildPoint(false_block); builder.makeReturn(false); builder.setBuildPoint(current_build_point); } spv::Id build_descriptor_qa_check_function(SPIRVModule &module) { auto &builder = module.get_builder(); spv::Id fault_func_id, global_buffer_id; build_descriptor_qa_fault_report(module, fault_func_id, global_buffer_id); spv::Id heap_buffer_type_id = build_descriptor_qa_heap_buffer_type(builder); spv::Id descriptor_qa_heap_buffer_id = module.create_variable(spv::StorageClassStorageBuffer, heap_buffer_type_id, "QAHeapData"); builder.addDecoration(descriptor_qa_heap_buffer_id, spv::DecorationDescriptorSet, module.get_descriptor_qa_info().heap_desc_set); builder.addDecoration(descriptor_qa_heap_buffer_id, spv::DecorationBinding, module.get_descriptor_qa_info().heap_binding); builder.addDecoration(descriptor_qa_heap_buffer_id, spv::DecorationNonWritable); auto heap_buffer_id = descriptor_qa_heap_buffer_id; auto *current_build_point = builder.getBuildPoint(); spv::Block *entry = nullptr; Vector param_types(3, builder.makeUintType(32)); auto *func = builder.makeFunctionEntry(spv::NoPrecision, builder.makeUintType(32), "descriptor_qa_check", param_types, {}, &entry); builder.setBuildPoint(entry); spv::Id offset_id = func->getParamId(0); spv::Id descriptor_type_id = func->getParamId(1); spv::Id instruction_id = func->getParamId(2); builder.addName(offset_id, "heap_offset"); builder.addName(descriptor_type_id, "descriptor_type_mask"); builder.addName(instruction_id, "instruction"); spv::Id descriptor_count_id = build_ssbo_load(builder, builder.makeUintType(32), heap_buffer_id, uint32_t(DescriptorQAHeapMembers::DescriptorCount)); spv::Id fallback_offset_id = descriptor_count_id; spv::Id heap_id = build_ssbo_load(builder, builder.makeUintType(32), heap_buffer_id, uint32_t(DescriptorQAHeapMembers::HeapIndex)); spv::Id timestamp_id = build_ssbo_load(builder, builder.makeUintType(32), global_buffer_id, uint32_t(DescriptorQAGlobalMembers::VAMapTimestamp)); spv::Id cookie_descriptor_info = build_ssbo_load_array(builder, builder.makeVectorType(builder.makeUintType(32), 3), heap_buffer_id, uint32_t(DescriptorQAHeapMembers::CookiesDescriptorInfo), offset_id); spv::Id cookie_id; spv::Id cookie_shifted_id; spv::Id cookie_mask_id; spv::Id descriptor_timestamp_id; spv::Id descriptor_info_id; build_cookie_descriptor_info_split(builder, cookie_descriptor_info, cookie_id, cookie_shifted_id, cookie_mask_id, descriptor_timestamp_id, descriptor_info_id); spv::Id live_status_id = build_ssbo_load_array(builder, builder.makeUintType(32), global_buffer_id, uint32_t(DescriptorQAGlobalMembers::LiveStatusTable), cookie_shifted_id); spv::Id live_status_cond_id = build_live_check(builder, live_status_id, cookie_mask_id); spv::Id type_cond_id = build_binary_op(builder, builder.makeUintType(32), spv::OpBitwiseAnd, descriptor_info_id, descriptor_type_id); type_cond_id = build_binary_op(builder, builder.makeBoolType(), spv::OpIEqual, type_cond_id, descriptor_type_id); spv::Id out_of_range_id = build_binary_op(builder, builder.makeBoolType(), spv::OpUGreaterThanEqual, offset_id, descriptor_count_id); // First check: descriptor index is in range of heap. auto *range_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect); range_check->addIdOperand(out_of_range_id); range_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_INDEX_OUT_OF_RANGE_BIT)); range_check->addIdOperand(builder.makeUintConstant(0u)); // Second: Check if type matches. auto *type_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect); type_check->addIdOperand(type_cond_id); type_check->addIdOperand(builder.makeUintConstant(0u)); type_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_INVALID_TYPE_BIT)); // Third: Check if cookie is alive. auto *alive_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect); alive_check->addIdOperand(live_status_cond_id); alive_check->addIdOperand(builder.makeUintConstant(0u)); alive_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_RESOURCE_DESTROYED_BIT)); // Fourth: Check if the view was created before GPU submission happened. auto *time_check_cond = builder.addInstruction(builder.makeBoolType(), spv::OpUGreaterThanEqual); time_check_cond->addIdOperand(timestamp_id); time_check_cond->addIdOperand(descriptor_timestamp_id); auto *time_check = builder.addInstruction(builder.makeUintType(32), spv::OpSelect); time_check->addIdOperand(time_check_cond->getResultId()); time_check->addIdOperand(builder.makeUintConstant(0u)); time_check->addIdOperand(builder.makeUintConstant(DESCRIPTOR_QA_FAULT_VA_TIMESTAMP_INVALID_BIT)); auto *merge_check0 = builder.addInstruction(builder.makeUintType(32), spv::OpBitwiseOr); auto *merge_check1 = builder.addInstruction(builder.makeUintType(32), spv::OpBitwiseOr); auto *merge_check2 = builder.addInstruction(builder.makeUintType(32), spv::OpBitwiseOr); merge_check0->addIdOperand(range_check->getResultId()); merge_check0->addIdOperand(type_check->getResultId()); merge_check1->addIdOperand(merge_check0->getResultId()); merge_check1->addIdOperand(alive_check->getResultId()); merge_check2->addIdOperand(merge_check1->getResultId()); merge_check2->addIdOperand(time_check->getResultId()); auto *fault_cond = builder.addInstruction(builder.makeBoolType(), spv::OpINotEqual); fault_cond->addIdOperand(merge_check2->getResultId()); fault_cond->addIdOperand(builder.makeUintConstant(0u)); spv::Id fault_type_id = merge_check2->getResultId(); spv::Id fault_cond_id = fault_cond->getResultId(); auto *fault_block = new spv::Block(builder.getUniqueId(), *func); auto *correct_block = new spv::Block(builder.getUniqueId(), *func); builder.createSelectionMerge(correct_block, 0); builder.createConditionalBranch(fault_cond_id, fault_block, correct_block); { builder.setBuildPoint(fault_block); auto *call = builder.addInstruction(builder.makeVoidType(), spv::OpFunctionCall); call->addIdOperand(fault_func_id); call->addIdOperand(fault_type_id); call->addIdOperand(offset_id); call->addIdOperand(cookie_id); call->addIdOperand(heap_id); call->addIdOperand(descriptor_type_id); call->addIdOperand(descriptor_info_id); call->addIdOperand(instruction_id); builder.makeReturn(false, fallback_offset_id); } builder.setBuildPoint(correct_block); builder.makeReturn(false, offset_id); builder.setBuildPoint(current_build_point); return func->getId(); } } ================================================ FILE: descriptor_qa.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include #include "spirv.hpp" namespace dxil_spv { static constexpr uint32_t Version = 2; struct DescriptorQAInfo { uint32_t version = 0; uint32_t global_desc_set = 0; uint32_t global_binding = 0; uint32_t heap_desc_set = 0; uint32_t heap_binding = 0; uint64_t shader_hash = 0; }; enum class InstructionInstrumentationType { FullNanInf = 0, // Only instrument writes to externally visible memory, etc. // Gets rid of potential false positives. ExternallyVisibleWriteNanInf = 1, // Flushes all NaNs to zero. // Useful when trying to figure out where a NaN is first generated. FlushNaNToZero = 2, // Adds assertions with OpAssumeTrueKHR. For now, these are resolved internally, // but we could add a more that forwards them to driver. ExpectAssume = 3, BufferSynchronizationValidation = 4 }; struct InstructionInstrumentationInfo { uint32_t version = 0; bool enabled = false; bool fp16 = false; bool fp32 = false; bool fp64 = false; uint32_t control_desc_set = 0; uint32_t control_binding = 0; uint32_t payload_desc_set = 0; uint32_t payload_binding = 0; uint64_t shader_hash = 0; InstructionInstrumentationType type = {}; }; struct InstructionInstrumentationState { uint32_t instruction_count = 0; spv::Id nan_inf_instrument_fp16_call_id = 0; spv::Id nan_inf_instrument_fp32_call_id = 0; spv::Id nan_inf_instrument_fp64_call_id = 0; spv::Id assume_true_call_id = 0; spv::Id should_report_instrumentation_id = 0; spv::Id global_nan_inf_control_var_id = 0; spv::Id global_nan_inf_data_var_id = 0; InstructionInstrumentationInfo info = {}; }; enum DescriptorQATypeFlagBits { DESCRIPTOR_QA_TYPE_NONE_BIT = 0, DESCRIPTOR_QA_TYPE_SAMPLED_IMAGE_BIT = 1 << 0, DESCRIPTOR_QA_TYPE_STORAGE_IMAGE_BIT = 1 << 1, DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT = 1 << 2, DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT = 1 << 3, DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT = 1 << 4, DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT = 1 << 5, DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT = 1 << 6, DESCRIPTOR_QA_TYPE_SAMPLER_BIT = 1 << 7, DESCRIPTOR_QA_TYPE_RAW_VA_BIT = 1 << 8 }; using DescriptorQATypeFlags = uint32_t; enum class DescriptorQAGlobalMembers { FailedShaderHash = 0, FailedOffset, FailedHeap, FailedCookie, FaultAtomic, FailedInstruction, FailedDescriptorTypeMask, ActualDescriptorTypeMask, FaultType, VAMapTimestamp, LiveStatusTable }; enum DescriptorQAFaultTypeBits { DESCRIPTOR_QA_FAULT_INDEX_OUT_OF_RANGE_BIT = 1 << 0, DESCRIPTOR_QA_FAULT_INVALID_TYPE_BIT = 1 << 1, DESCRIPTOR_QA_FAULT_RESOURCE_DESTROYED_BIT = 1 << 2, DESCRIPTOR_QA_FAULT_VA_TIMESTAMP_INVALID_BIT = 1 << 3 }; enum class DescriptorQAHeapMembers { DescriptorCount = 0, HeapIndex, CookiesDescriptorInfo }; class SPIRVModule; spv::Id build_descriptor_qa_check_function(SPIRVModule &module); } ================================================ FILE: dxbc_spirv_sandbox.cpp ================================================ /* Copyright (c) 2025 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT */ #include "ir/ir.h" #include "ir/ir_builder.h" #include "dxil_converter.hpp" #include "module.hpp" #include "api/test_api.h" #include "context.hpp" #include "thread_local_allocator.hpp" #include "cfg_structurizer.hpp" #include "logging.hpp" #include "spirv_cross_c.h" #include "spirv-tools/libspirv.hpp" using namespace dxil_spv; using namespace dxbc_spv; struct Remapper : ResourceRemappingInterface { bool remap_srv(const D3DBinding &d3d_binding, VulkanSRVBinding &vulkan_binding) override { vulkan_binding = {}; vulkan_binding.buffer_binding.descriptor_set = d3d_binding.register_space; vulkan_binding.buffer_binding.binding = d3d_binding.register_index; if (d3d_binding.kind == DXIL::ResourceKind::StructuredBuffer || d3d_binding.kind == DXIL::ResourceKind::RawBuffer) vulkan_binding.buffer_binding.descriptor_type = VulkanDescriptorType::SSBO; return true; } bool remap_sampler(const D3DBinding &d3d_binding, VulkanBinding &vulkan_binding) override { vulkan_binding = {}; vulkan_binding.descriptor_set = d3d_binding.register_space; vulkan_binding.binding = d3d_binding.register_index; return true; } bool remap_uav(const D3DUAVBinding &d3d_binding, VulkanUAVBinding &vulkan_binding) override { vulkan_binding = {}; vulkan_binding.buffer_binding.descriptor_set = d3d_binding.binding.register_space; vulkan_binding.buffer_binding.binding = d3d_binding.binding.register_index; if (d3d_binding.binding.kind == DXIL::ResourceKind::StructuredBuffer || d3d_binding.binding.kind == DXIL::ResourceKind::RawBuffer) vulkan_binding.buffer_binding.descriptor_type = VulkanDescriptorType::SSBO; if (d3d_binding.counter) { vulkan_binding.counter_binding.descriptor_set = d3d_binding.binding.register_space; vulkan_binding.counter_binding.binding = d3d_binding.binding.register_index; vulkan_binding.counter_binding.descriptor_type = VulkanDescriptorType::TexelBuffer; } return true; } bool remap_cbv(const D3DBinding &d3d_binding, VulkanCBVBinding &vulkan_binding) override { vulkan_binding = {}; vulkan_binding.buffer.descriptor_set = d3d_binding.register_space; vulkan_binding.buffer.binding = d3d_binding.register_index; return true; } bool remap_vertex_input(const D3DStageIO &d3d_input, VulkanStageIO &vulkan_location) override { vulkan_location = {}; vulkan_location.location = d3d_input.start_row; return true; } bool remap_stream_output(const D3DStreamOutput &, VulkanStreamOutput &vk_output) override { vk_output = {}; return true; } bool remap_stage_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) override { return true; } bool remap_stage_output(const D3DStageIO &d3d_output, VulkanStageIO &vk_output) override { return true; } unsigned get_root_constant_word_count() override { return 0; } unsigned get_root_descriptor_count() override { return 0; } bool has_nontrivial_stage_input_remapping() override { return false; } }; static std::string convert_to_asm(const void *code, size_t size) { spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3); tools.SetMessageConsumer([](spv_message_level_t, const char *, const spv_position_t &, const char *message) { LOGE("SPIRV-Tools message: %s\n", message); }); std::string str; if (!tools.Disassemble(static_cast(code), size / sizeof(uint32_t), &str, SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES | SPV_BINARY_TO_TEXT_OPTION_INDENT | SPV_BINARY_TO_TEXT_OPTION_NESTED_INDENT)) return ""; else return str; } static bool validate_spirv(const void *code, size_t size) { spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3); bool expected_failure = false; bool unexpected_failure = false; tools.SetMessageConsumer([&](spv_message_level_t, const char *, const spv_position_t &, const char *message) { if (strstr(message, "08721") || strstr(message, "08722")) { LOGW("SPIRV-Tools message expected failure: %s\n", message); expected_failure = true; } else { LOGE("SPIRV-Tools message: %s\n", message); unexpected_failure = true; } }); spvtools::ValidatorOptions opts; opts.SetScalarBlockLayout(true); return tools.Validate(static_cast(code), size / sizeof(uint32_t), opts) || (expected_failure && !unexpected_failure); } static std::string convert_to_glsl(const void *code, size_t size) { std::string ret; spvc_context context; if (spvc_context_create(&context) != SPVC_SUCCESS) return ret; spvc_parsed_ir ir; if (spvc_context_parse_spirv(context, static_cast(code), size / sizeof(uint32_t), &ir) != SPVC_SUCCESS) goto cleanup; spvc_compiler compiler; if (spvc_context_create_compiler(context, SPVC_BACKEND_GLSL, ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler) != SPVC_SUCCESS) goto cleanup; spvc_compiler_options opts; if (spvc_compiler_create_compiler_options(compiler, &opts) != SPVC_SUCCESS) goto cleanup; spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE); spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_GLSL_VERSION, 460); spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS, SPVC_TRUE); spvc_compiler_install_compiler_options(compiler, opts); const char *source; if (spvc_compiler_compile(compiler, &source) != SPVC_SUCCESS) goto cleanup; ret = source; cleanup: spvc_context_destroy(context); return ret; } static Vector run_test(const char *name, ir::Builder &builder) { LOGI("Testing %s ...\n", name); LLVMBCParser parser; if (!parser.parseDXBC(builder)) { LOGE("Failed to parse.\n"); return {}; } SPIRVModule module; Converter converter(parser, nullptr, module); Remapper remapper; OptionSSBOAlignment align; align.alignment = 1; converter.add_option(align); OptionShaderDemoteToHelper demote; demote.supported = true; converter.add_option(demote); #if 1 OptionMinPrecisionNative16Bit native_16bit; native_16bit.enabled = true; converter.add_option(native_16bit); #endif converter.set_resource_remapping_interface(&remapper); auto entry = converter.convert_entry_point(); if (!entry.entry.entry) { LOGE("Failed to convert function.\n"); return {}; } { CFGStructurizer structurizer(entry.entry.entry, *entry.node_pool, module); if (entry.entry.is_structured) structurizer.run_trivial(); else structurizer.run(); module.emit_entry_point_function_body(structurizer); } for (auto &leaf : entry.leaf_functions) { if (!leaf.entry) { LOGE("Leaf function is nullptr!\n"); return {}; } CFGStructurizer structurizer(leaf.entry, *entry.node_pool, module); module.set_entry_build_point(leaf.func); if (leaf.is_structured) structurizer.run_trivial(); else structurizer.run(); module.emit_leaf_function_body(leaf.func, structurizer); } Vector spirv; if (!module.finalize_spirv(spirv)) { LOGE("Failed to finalize SPIR-V.\n"); return {}; } #if 1 if (!validate_spirv(spirv.data(), spirv.size() * sizeof(uint32_t))) { LOGE("Failed to validate SPIR-V.\n"); return {}; } #endif return spirv; } int main(int argc, char **argv) { auto tests = test_api::enumerateTests(nullptr); for (auto &test : tests) { #if 0 if (test.name != "test_arithmetic_fp32_special") continue; #endif begin_thread_allocator_context(); { auto spirv = run_test(test.name.c_str(), test.builder); if (spirv.empty()) { LOGE("Failure to convert test to SPIR-V!\n"); return EXIT_FAILURE; } auto disasm = convert_to_asm(spirv.data(), spirv.size() * sizeof(uint32_t)); auto glsl = convert_to_glsl(spirv.data(), spirv.size() * sizeof(uint32_t)); FILE *file_asm = nullptr; FILE *file_glsl = nullptr; if (argc == 2) { std::string path = argv[1]; path += '/'; path += test.name; auto path_asm = path + ".asm"; auto path_glsl = path + ".glsl"; file_asm = fopen(path_asm.c_str(), "w"); file_glsl = fopen(path_glsl.c_str(), "w"); if (!file_asm || !file_glsl) { LOGE("Failed to open file \"%s\" and \"%s\"\n", path_asm.c_str(), path_glsl.c_str()); return EXIT_FAILURE; } } if (file_asm && file_glsl) { fprintf(file_asm, "SPIR-V:\n%s\n", disasm.c_str()); fprintf(file_glsl, "GLSL:\n%s\n", glsl.c_str()); fclose(file_asm); fclose(file_glsl); } else { LOGI("SPIR-V:\n%s\n", disasm.c_str()); LOGI("GLSL:\n%s\n", glsl.c_str()); } } end_thread_allocator_context(); } } ================================================ FILE: dxil-disasm.py ================================================ #!/usr/bin/env python3 # Copyright (c) 2019-2023 Hans-Kristian Arntzen for Valve Corporation # # SPDX-License-Identifier: MIT # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import sys import os import os.path import subprocess import argparse import tempfile import re import multiprocessing def disasm_shader_regex(input_file, args, regex): f, path = tempfile.mkstemp(suffix = 'dxil') f2, path2 = tempfile.mkstemp(suffix = 'dxil2') os.close(f) os.close(f2) result = None try: dxil_extract_cmd = [args.dxil_extract, input_file, '--output'] p = subprocess.Popen(dxil_extract_cmd + [path, '--verbose'], stdout = subprocess.PIPE) subprocess.check_call(dxil_extract_cmd + [path2, '--reflection'], stdout = subprocess.DEVNULL) llvm_dis_cmd = [args.llvm_dis, '-o', '/dev/stdout'] main_pipe = subprocess.Popen(llvm_dis_cmd + [path], stdout = subprocess.PIPE) refl_pipe = subprocess.Popen(llvm_dis_cmd + [path2], stdout = subprocess.PIPE) lines_main = main_pipe.communicate()[0].decode() lines_refl = refl_pipe.communicate()[0].decode() if args.isolate: allow = re.search(regex, lines_main + lines_refl) else: allow = True if allow: result = p.communicate()[0].decode() result += ' DXIL:\n' for line in lines_main.splitlines(): if re.search(regex, line): result += ' ' + line + '\n' result += ' STAT:\n' for line in lines_refl.splitlines(): if re.search(regex, line): result += ' ' + line + '\n' except: pass os.remove(path) os.remove(path2) return result def disasm_shader_plain(input_file, args, regex): f, path = tempfile.mkstemp(suffix = 'dxil') result = '' try: dxil_extract_cmd = [args.dxil_extract, '--verbose', input_file, '--output', path] if args.reflect: dxil_extract_cmd.append('--reflection') p = subprocess.Popen(dxil_extract_cmd, stdout = subprocess.PIPE) result += p.communicate()[0].decode() llvm_dis_cmd = [args.llvm_dis, '-o', '/dev/stdout', path] p = subprocess.Popen(llvm_dis_cmd, stdout = subprocess.PIPE) result += p.communicate()[0].decode() except: pass os.remove(path) return result def main(): parser = argparse.ArgumentParser(description = 'Script for disassembling DXIL.') parser.add_argument('input', help = 'File or folder containing shader files to convert.') parser.add_argument('--output', help = 'Path where LLVM asm is output.', default = '/dev/stdout') parser.add_argument('--dxil-extract', help = 'Path to dxil-extract', default = 'dxil-extract') parser.add_argument('--llvm-dis', help = 'Path to llvm-dis', default = 'llvm-dis') parser.add_argument('--reflect', action = 'store_true', help = 'Use reflection section') parser.add_argument('--isolate', action = 'store_true', help = 'Isolate regex output to hits only') parser.add_argument('--symbol-regex', type = str, help = 'Grep disassemblies for a symbol') args = parser.parse_args() if not args.input: sys.stderr.write('Need input shader.\n') sys.exit(1) if not args.dxil_extract: sys.stderr.write('Need dxil-extract path.\n') sys.exit(1) if args.symbol_regex: regex = re.compile(args.symbol_regex) else: regex = None if os.path.isfile(args.input): files = [args.input] else: files = [] for file in os.scandir(args.input): if os.path.splitext(file.name)[1] == '.dxil': files.append(os.path.join(args.input, file.name)) pool = multiprocessing.Pool(multiprocessing.cpu_count()) results = [] counter = 0 with open(args.output, 'w') as f: for input_file in files: results.append(pool.apply_async(disasm_shader_regex if regex else disasm_shader_plain, args = (input_file, args, regex))) for res in results: lines = res.get() counter += 1 print('Progress {} / {}'.format(counter, len(files))) if lines is not None: f.writelines(lines) f.writelines('\n\n') if __name__ == '__main__': main() ================================================ FILE: dxil.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "thread_local_allocator.hpp" #include #include namespace DXIL { constexpr size_t ContainerHashSize = 16; struct ContainerHeader { uint32_t header_fourcc; uint8_t digest[ContainerHashSize]; uint16_t major_version; uint16_t minor_version; uint32_t container_size_in_bytes; uint32_t part_count; }; struct PartHeader { uint32_t part_fourcc; uint32_t part_size; }; struct ProgramHeader { uint32_t program_version; uint32_t size_in_uint32; uint32_t dxil_magic; uint32_t dxil_version; uint32_t bitcode_offset; uint32_t bitcode_size; }; struct IOElement { dxil_spv::String semantic_name; uint32_t stream_index; uint32_t semantic_index; uint32_t system_value_semantic; uint32_t component_type; uint32_t register_index; uint32_t mask; uint32_t min_precision; }; constexpr uint32_t fourcc(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { return a | (b << 8) | (c << 16) | (d << 24); } enum class FourCC : uint32_t { Container = fourcc('D', 'X', 'B', 'C'), ResourceDef = fourcc('R', 'D', 'E', 'F'), InputSignature = fourcc('I', 'S', 'G', '1'), OutputSignature = fourcc('O', 'S', 'G', '1'), PatchConstantSignature = fourcc('P', 'S', 'G', '1'), ShaderStatistics = fourcc('S', 'T', 'A', 'T'), ShaderDebugInfoDXIL = fourcc('I', 'L', 'D', 'B'), ShaderDebugName = fourcc('I', 'L', 'D', 'N'), FeatureInfo = fourcc('S', 'F', 'I', '0'), PrivateData = fourcc('P', 'R', 'I', 'V'), RootSignature = fourcc('R', 'T', 'S', '0'), DXIL = fourcc('D', 'X', 'I', 'L'), SHDR = fourcc('S', 'H', 'D', 'R'), SHEX = fourcc('S', 'H', 'E', 'X'), PipelineStateValidation = fourcc('P', 'S', 'V', '0'), RuntimeData = fourcc('R', 'D', 'A', 'T'), ShaderHash = fourcc('H', 'A', 'S', 'H') }; enum class RuntimeDataPartType : uint32_t { Invalid = 0, StringBuffer = 1, IndexArrays = 2, ResourceTable = 3, FunctionTable = 4, RawBytes = 5, SubobjectTable = 6 }; enum class SubobjectKind : uint32_t { StateObjectConfig = 0, GlobalRootSignature = 1, LocalRootSignature = 2, SubobjectToExportsAssociation = 8, RaytracingShaderConfig = 9, RaytracingPipelineConfig = 10, HitGroup = 11, RaytracingPipelineConfig1 = 12 }; enum class HitGroupType : uint32_t { Triangle = 0, Procedural = 1 }; enum class ComponentType : uint8_t { Invalid = 0, I1, I16, U16, I32, U32, I64, U64, F16, F32, F64, SNormF16, UNormF16, SNormF32, UNormF32, SNormF64, UNormF64, InternalU8 = 0xff // Doesn't exist, but dummy value to signal 8-bit SSBO }; enum class InterpolationMode : uint8_t { Undefined, Constant, Linear, LinearCentroid, LinearNoperspective, LinearNoperspectiveCentroid, LinearSample, LinearNoperspectiveSample, Invalid }; enum class Semantic : uint8_t { User, VertexID, InstanceID, Position, RenderTargetArrayIndex, ViewPortArrayIndex, ClipDistance, CullDistance, OutputControlPointID, DomainLocation, PrimitiveID, GSInstanceID, SampleIndex, IsFrontFace, Coverage, InnerCoverage, Target, Depth, DepthLessEqual, DepthGreaterEqual, StencilRef, DispatchThreadID, GroupID, GroupIndex, GroupThreadID, TessFactor, InsideTessFactor, ViewID, Barycentrics, ShadingRate, CullPrimitive, // Fake semantics to disambiguate semantics based on interpolation flags. InternalBarycentricsNoPerspective = 255 }; enum class ResourceType : uint8_t { SRV = 0, UAV = 1, CBV = 2, Sampler = 3 }; enum class ResourceKind : uint8_t { Invalid = 0, Texture1D, Texture2D, Texture2DMS, Texture3D, TextureCube, Texture1DArray, Texture2DArray, Texture2DMSArray, TextureCubeArray, TypedBuffer, RawBuffer, StructuredBuffer, CBuffer, Sampler, TBuffer, RTAccelerationStructure, FeedbackTexture2D, FeedbackTexture2DArray }; enum class Op : unsigned { // Input output TempRegLoad = 0, TempRegStore = 1, MinPrecXRegLoad = 2, MinPrecXRegStore = 3, LoadInput = 4, StoreOutput = 5, FAbs = 6, Saturate = 7, IsNan = 8, IsInf = 9, IsFinite = 10, IsNormal = 11, Cos = 12, Sin = 13, Tan = 14, Acos = 15, Asin = 16, Atan = 17, Hcos = 18, Hsin = 19, Htan = 20, Exp = 21, Frc = 22, Log = 23, Sqrt = 24, Rsqrt = 25, Round_ne = 26, Round_ni = 27, Round_pi = 28, Round_z = 29, Bfrev = 30, Countbits = 31, FirstbitLo = 32, FirstbitHi = 33, FirstbitSHi = 34, FMax = 35, FMin = 36, IMax = 37, IMin = 38, UMax = 39, UMin = 40, IMul = 41, UMul = 42, UDiv = 43, UAddc = 44, USubb = 45, FMad = 46, Fma = 47, IMad = 48, UMad = 49, Msad = 50, Ibfe = 51, Ubfe = 52, Bfi = 53, Dot2 = 54, Dot3 = 55, Dot4 = 56, CreateHandle = 57, CBufferLoad = 58, CBufferLoadLegacy = 59, Sample = 60, SampleBias = 61, SampleLevel = 62, SampleGrad = 63, SampleCmp = 64, SampleCmpLevelZero = 65, TextureLoad = 66, TextureStore = 67, BufferLoad = 68, BufferStore = 69, BufferUpdateCounter = 70, CheckAccessFullyMapped = 71, GetDimensions = 72, TextureGather = 73, TextureGatherCmp = 74, Texture2DMSGetSamplePosition = 75, RenderTargetGetSamplePosition = 76, RenderTargetGetSampleCount = 77, AtomicBinOp = 78, AtomicCompareExchange = 79, Barrier = 80, CalculateLOD = 81, Discard = 82, DerivCoarseX = 83, DerivCoarseY = 84, DerivFineX = 85, DerivFineY = 86, EvalSnapped = 87, EvalSampleIndex = 88, EvalCentroid = 89, SampleIndex = 90, Coverage = 91, InnerCoverage = 92, ThreadId = 93, GroupId = 94, ThreadIdInGroup = 95, FlattenedThreadIdInGroup = 96, EmitStream = 97, CutStream = 98, EmitThenCutStream = 99, GSInstanceID = 100, MakeDouble = 101, SplitDouble = 102, LoadOutputControlPoint = 103, LoadPatchConstant = 104, DomainLocation = 105, StorePatchConstant = 106, OutputControlPointID = 107, PrimitiveID = 108, CycleCounterLegacy = 109, WaveIsFirstLane = 110, WaveGetLaneIndex = 111, WaveGetLaneCount = 112, WaveAnyTrue = 113, WaveAllTrue = 114, WaveActiveAllEqual = 115, WaveActiveBallot = 116, WaveReadLaneAt = 117, WaveReadLaneFirst = 118, WaveActiveOp = 119, WaveActiveBit = 120, WavePrefixOp = 121, QuadReadLaneAt = 122, QuadOp = 123, BitcastI16toF16 = 124, BitcastF16toI16 = 125, BitcastI32toF32 = 126, BitcastF32toI32 = 127, BitcastI64toF64 = 128, BitcastF64toI64 = 129, LegacyF32ToF16 = 130, LegacyF16ToF32 = 131, LegacyDoubleToFloat = 132, LegacyDoubleToSInt32 = 133, LegacyDoubleToUInt32 = 134, WaveAllBitCount = 135, WavePrefixBitCount = 136, AttributeAtVertex = 137, ViewID = 138, RawBufferLoad = 139, RawBufferStore = 140, InstanceID = 141, InstanceIndex = 142, HitKind = 143, RayFlags = 144, DispatchRaysIndex = 145, DispatchRaysDimensions = 146, WorldRayOrigin = 147, WorldRayDirection = 148, ObjectRayOrigin = 149, ObjectRayDirection = 150, ObjectToWorld = 151, WorldToObject = 152, RayTMin = 153, RayTCurrent = 154, IgnoreHit = 155, AcceptHitAndEndSearch = 156, TraceRay = 157, ReportHit = 158, CallShader = 159, CreateHandleForLib = 160, PrimitiveIndex = 161, Dot2AddHalf = 162, Dot4AddI8Packed = 163, Dot4AddU8Packed = 164, WaveMatch = 165, WaveMultiPrefixOp = 166, WaveMultiPrefixBitCount = 167, SetMeshOutputCounts = 168, EmitIndices = 169, GetMeshPayload = 170, StoreVertexOutput = 171, StorePrimitiveOutput = 172, DispatchMesh = 173, WriteSamplerFeedback = 174, WriteSamplerFeedbackBias = 175, WriteSamplerFeedbackLevel = 176, WriteSamplerFeedbackGrad = 177, AllocateRayQuery = 178, RayQuery_TraceRayInline = 179, RayQuery_Proceed = 180, RayQuery_Abort = 181, RayQuery_CommitNonOpaqueTriangleHit = 182, RayQuery_CommitProceduralPrimitiveHit = 183, RayQuery_CommittedStatus = 184, RayQuery_CandidateType = 185, RayQuery_CandidateObjectToWorld3x4 = 186, RayQuery_CandidateWorldToObject3x4 = 187, RayQuery_CommittedObjectToWorld3x4 = 188, RayQuery_CommittedWorldToObject3x4 = 189, RayQuery_CandidateProceduralPrimitiveNonOpaque = 190, RayQuery_CandidateTriangleFrontFace = 191, RayQuery_CommittedTriangleFrontFace = 192, RayQuery_CandidateTriangleBarycentrics = 193, RayQuery_CommittedTriangleBarycentrics = 194, RayQuery_RayFlags = 195, RayQuery_WorldRayOrigin = 196, RayQuery_WorldRayDirection = 197, RayQuery_RayTMin = 198, RayQuery_CandidateTriangleRayT = 199, RayQuery_CommittedRayT = 200, RayQuery_CandidateInstanceIndex = 201, RayQuery_CandidateInstanceID = 202, RayQuery_CandidateGeometryIndex = 203, RayQuery_CandidatePrimitiveIndex = 204, RayQuery_CandidateObjectRayOrigin = 205, RayQuery_CandidateObjectRayDirection = 206, RayQuery_CommittedInstanceIndex = 207, RayQuery_CommittedInstanceID = 208, RayQuery_CommittedGeometryIndex = 209, RayQuery_CommittedPrimitiveIndex = 210, RayQuery_CommittedObjectRayOrigin = 211, RayQuery_CommittedObjectRayDirection = 212, RayQuery_GeometryIndex = 213, RayQuery_CandidateInstanceContributionToHitGroupIndex = 214, RayQuery_CommittedInstanceContributionToHitGroupIndex = 215, AnnotateHandle = 216, CreateHandleFromBinding = 217, CreateHandleFromHeap = 218, Unpack4x8 = 219, Pack4x8 = 220, IsHelperLane = 221, QuadVote = 222, TextureGatherRaw = 223, SampleCmpLevel = 224, TextureStoreSample = 225, WaveMatrix_Annotate = 226, WaveMatrix_Depth = 227, WaveMatrix_Fill = 228, WaveMatrix_LoadRawBuf = 229, WaveMatrix_LoadGroupShared = 230, WaveMatrix_StoreRawBuf = 231, WaveMatrix_StoreGroupShared = 232, WaveMatrix_Multiply = 233, WaveMatrix_MultiplyAccumulate = 234, WaveMatrix_ScalarOp = 235, WaveMatrix_SumAccumulate = 236, WaveMatrix_Add = 237, AllocateNodeOutputRecords = 238, GetNodeRecordPtr = 239, IncrementOutputCount = 240, OutputComplete = 241, GetInputRecordCount = 242, FinishedCrossGroupSharing = 243, BarrierByMemoryType = 244, BarrierByMemoryHandle = 245, BarrierByNodeRecordHandle = 246, CreateNodeOutputHandle = 247, IndexNodeHandle = 248, AnnotateNodeHandle = 249, CreateNodeInputRecordHandle = 250, AnnotateNodeRecordHandle = 251, NodeOutputIsValid = 252, GetRemainingRecursionLevels = 253, SampleCmpGrad = 254, SampleCmpBias = 255, StartVertexLocation = 256, StartInstanceLocation = 257, // Internal extensions for where there is impedance mismatch ExtendedDeriv, ExtendedCalculateLOD, ExtendedGetDimensions, ExtendedFClamp, ExtendedIClamp, ExtendedUClamp, ExtendedLegacyF32ToF16, ExtendedLegacyF16ToF32, ExtendedIAbs, ExtendedEvalSnapped, ExtendedSpirvIbfe, ExtendedSpirvUbfe, ExtendedSpirvBfi, ExtendedSpirvFindLSB, ExtendedSpirvIFindMSB, ExtendedSpirvUFindMSB, ExtendedSpirvIAddCarry, ExtendedSpirvISubBorrow, ExtendedSpirvSMulExtended, ExtendedSpirvUMulExtended, ExtendedSpirvLoadInput, ExtendedSpirvControlPointCountIn, ExtendedPow, Count }; enum class AtomicBinOp : uint8_t { IAdd = 0, And = 1, Or = 2, Xor = 3, IMin = 4, IMax = 5, UMin = 6, UMax = 7, Exchange = 8, // Internal extensions for custom IR // Load = optimized or/add of 0 // Store = optimized exchange Sub = 200, Load, Store, Invalid = 255 }; enum class ShaderPropertyTag : uint8_t { ShaderFlags = 0, GSState = 1, DSState = 2, HSState = 3, NumThreads = 4, AutoBindingSpace = 5, RayPayloadSize = 6, RayAttribSizeTag = 7, ShaderKind = 8, MSState = 9, ASState = 10, WaveSize = 11, NodeLaunchType = 13, NodeIsProgramEntry = 14, NodeID = 15, NodeLocalRootArgumentsTableIndex = 16, NodeShareInputOf = 17, NodeDispatchGrid = 18, NodeMaxRecursionDepth = 19, NodeInputs = 20, NodeOutputs = 21, NodeMaxDispatchGrid = 22, RangedWaveSize = 23 }; enum class GSStageOutTags : uint32_t { Stream = 0, Invalid }; enum BarrierModeBits : uint8_t { SyncThreadGroup = 1 << 0, AccessUAVGlobal = 1 << 1, AccessUAVThreadGroup = 1 << 2, AccessGroupShared = 1 << 3 }; enum class AddressSpace : uint8_t { Thread = 0, GroupShared = 3, PhysicalNodeIO = 6, Invalid }; enum class InputPrimitive : uint8_t { Undefined = 0, Point = 1, Line = 2, Triangle = 3, LineWithAdjacency = 6, TriangleWithAdjaceny = 7 }; enum class PrimitiveTopology : uint8_t { Undefined = 0, PointList = 1, LineList = 2, LineStrip = 3, TriangleList = 4, TriangleStrip = 5 }; enum class TessellatorDomain : uint8_t { Undefined = 0, IsoLine = 1, Tri = 2, Quad = 3 }; enum class TessellatorOutputPrimitive : uint8_t { Undefined = 0, Point = 1, Line = 2, TriangleCW = 3, TriangleCCW = 4 }; enum class TessellatorPartitioning : uint8_t { Undefined = 0, Integer = 1, Pow2 = 2, FractionalOdd = 3, FractionalEven = 4 }; enum class MeshOutputTopology { Undefined = 0, Line = 1, Triangle = 2 }; enum ShaderFlag { ShaderFlagEarlyDepthStencil = 1 << 3, ShaderFlagNativeLowPrecision = 1 << 23 }; enum class WaveOpKind : uint8_t { Sum = 0, Product = 1, Min = 2, Max = 3 }; enum class WaveBitOpKind : uint8_t { And = 0, Or = 1, Xor = 2 }; enum class WaveMultiPrefixOpKind : uint8_t { Sum = 0, And = 1, Or = 2, Xor = 3, Product = 4 }; enum class SignedOpKind : uint8_t { Signed = 0, Unsigned = 1 }; enum class ShaderKind { Pixel = 0, Vertex, Geometry, Hull, Domain, Compute, Library, RayGeneration, Intersection, AnyHit, ClosestHit, Miss, Callable, Mesh, Amplification, Node, Invalid }; enum class NodeLaunchType { Invalid = 0, Broadcasting = 1, Coalescing = 2, Thread = 3 }; enum class NodeMetadataTag { NodeOutputID = 0, NodeIOFlags = 1, NodeRecordType = 2, NodeMaxRecords = 3, NodeMaxRecordsSharedWith = 4, NodeOutputArraySize = 5, NodeAllowSparseNodes = 6 }; enum NodeIOFlagBits { NodeIOInputBit = 0x1, NodeIOOutputBit = 0x2, NodeIOReadWriteBit = 0x4, NodeIOEmptyRecordBit = 0x8, NodeIONodeArrayBit = 0x10, NodeIOThreadRecordBit = 0x20, NodeIOGroupRecordBit = 0x40, NodeIODispatchRecordBit = 0x60, RecordGranularityMask = 0x60, NodeIOKindMask = 0x7f, NodeIOTrackRWInputSharingBit = 0x100, NodeIOGloballyCoherentBit = 0x200, NodeFlagsMask = 0x100, RecordFlagsMask = 0x200 }; enum class NodeIOKind { Invalid = 0, EmptyInput = NodeIOEmptyRecordBit | NodeIOInputBit, NodeOutput = NodeIOReadWriteBit | NodeIOOutputBit, NodeOutputArray = NodeIOReadWriteBit | NodeIOOutputBit | NodeIONodeArrayBit, EmptyOutput = NodeIOEmptyRecordBit | NodeIOOutputBit, EmptyOutputArray = NodeIOEmptyRecordBit | NodeIOOutputBit | NodeIONodeArrayBit, // Records: DispatchNodeInputRecord = NodeIOInputBit | NodeIODispatchRecordBit, GroupNodeInputRecords = NodeIOInputBit | NodeIOGroupRecordBit, ThreadNodeInputRecord = NodeIOInputBit | NodeIOThreadRecordBit, RWDispatchNodeInputRecord = NodeIOReadWriteBit | NodeIOInputBit | NodeIODispatchRecordBit, RWGroupNodeInputRecords = NodeIOReadWriteBit | NodeIOInputBit | NodeIOGroupRecordBit, RWThreadNodeInputRecord = NodeIOReadWriteBit | NodeIOInputBit | NodeIOThreadRecordBit, GroupNodeOutputRecords = NodeIOReadWriteBit | NodeIOOutputBit | NodeIOGroupRecordBit, ThreadNodeOutputRecords = NodeIOReadWriteBit | NodeIOOutputBit | NodeIOThreadRecordBit }; enum MemoryTypeFlagBits { MemoryTypeUavBit = 0x1, MemoryTypeGroupSharedBit = 0x2, MemoryTypeNodeInputBit = 0x4, MemoryTypeNodeOutputBit = 0x8, MemoryTypeAllBits = 0xf }; enum BarrierSemanticsFlagBits { GroupSyncBit = 0x1, GroupScopeBit = 0x2, DeviceScopeBit = 0x4 }; } // namespace DXIL ================================================ FILE: dxil_converter.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "opcodes/converter_impl.hpp" #include "opcodes/opcodes_dxil_builtins.hpp" #include "opcodes/opcodes_llvm_builtins.hpp" #include "opcodes/dxil/dxil_common.hpp" #include "opcodes/dxil/dxil_workgraph.hpp" #include "opcodes/dxil/dxil_geometry.hpp" #include "dxil_converter.hpp" #include "logging.hpp" #include "node.hpp" #include "node_pool.hpp" #include "spirv_module.hpp" #include #include namespace dxil_spv { Converter::Converter(LLVMBCParser &bitcode_parser_, LLVMBCParser *bitcode_reflection_parser_, SPIRVModule &module_) { impl = std::make_unique(bitcode_parser_, bitcode_reflection_parser_, module_); } Converter::~Converter() { } void Converter::add_local_root_constants(uint32_t register_space, uint32_t register_index, uint32_t num_words) { LocalRootSignatureEntry entry = {}; entry.type = LocalRootSignatureType::Constants; entry.constants.num_words = num_words; entry.constants.register_space = register_space; entry.constants.register_index = register_index; impl->local_root_signature.push_back(entry); } void Converter::add_local_root_descriptor(ResourceClass type, uint32_t register_space, uint32_t register_index) { LocalRootSignatureEntry entry = {}; entry.type = LocalRootSignatureType::Descriptor; entry.descriptor.type = type; entry.descriptor.register_space = register_space; entry.descriptor.register_index = register_index; impl->local_root_signature.push_back(entry); } void Converter::add_local_root_descriptor_table(Vector entries) { LocalRootSignatureEntry entry = {}; entry.type = LocalRootSignatureType::Table; entry.table_entries = std::move(entries); impl->local_root_signature.push_back(std::move(entry)); } void Converter::add_local_root_descriptor_table(const DescriptorTableEntry *entries, size_t count) { add_local_root_descriptor_table({ entries, entries + count }); } uint32_t Converter::get_patch_location_offset() const { return impl->patch_location_offset; } void Converter::set_patch_location_offset(uint32_t offset) { impl->patch_location_offset = offset; } void Converter::get_workgroup_dimensions(uint32_t &x, uint32_t &y, uint32_t &z) const { x = impl->execution_mode_meta.workgroup_threads[0]; y = impl->execution_mode_meta.workgroup_threads[1]; z = impl->execution_mode_meta.workgroup_threads[2]; } uint32_t Converter::get_patch_vertex_count() const { return impl->execution_mode_meta.stage_input_num_vertex; } void Converter::get_compute_wave_size_range(uint32_t &min, uint32_t &max, uint32_t &preferred) const { min = impl->execution_mode_meta.wave_size_min; max = impl->execution_mode_meta.wave_size_max; preferred = impl->execution_mode_meta.wave_size_preferred; } uint32_t Converter::get_compute_heuristic_max_wave_size() const { if (impl->execution_mode_meta.wave_size_min) return 0; return impl->execution_mode_meta.heuristic_max_wave_size; } uint32_t Converter::get_compute_heuristic_min_wave_size() const { if (impl->execution_mode_meta.wave_size_min) return 0; return impl->execution_mode_meta.heuristic_min_wave_size; } bool Converter::is_multiview_compatible() const { // We're not multiview compatible if ViewIndex does not correspond 1:1 with output layer index. // ViewIndex is limited, and if the constant Layer offset is too large, it may force "slow" path // with draw-level instancing. return impl->options.multiview.enable && !impl->multiview.custom_layer_index && impl->options.multiview.view_index_to_view_instance_spec_id != UINT32_MAX; } bool Converter::shader_requires_feature(ShaderFeature feature) const { switch (feature) { case ShaderFeature::Native16BitOperations: return impl->builder().hasCapability(spv::CapabilityFloat16) || impl->builder().hasCapability(spv::CapabilityInt16); default: return false; } } bool Converter::get_driver_version(uint32_t &driver_id, uint32_t &driver_version) const { if (impl->options.driver_version == 0) return false; driver_id = impl->options.driver_id; driver_version = impl->options.driver_version; return true; } ConvertedFunction Converter::convert_entry_point() { return impl->convert_entry_point(); } template static T get_constant_metadata(const llvm::MDNode *node, unsigned index) { return T( llvm::cast(node->getOperand(index))->getValue()->getUniqueInteger().getSExtValue()); } static String get_string_metadata(const llvm::MDNode *node, unsigned index) { #ifdef HAVE_LLVMBC return llvm::cast(node->getOperand(index))->getString(); #else std::string tmp = llvm::cast(node->getOperand(index))->getString(); String str(tmp.begin(), tmp.end()); return str; #endif } static String get_resource_name_metadata(const llvm::MDNode *node, const llvm::MDNode *reflections) { if (reflections) { unsigned bind_space = get_constant_metadata(node, 3); unsigned bind_register = get_constant_metadata(node, 4); unsigned num_operands = reflections->getNumOperands(); for (unsigned i = 0; i < num_operands; i++) { auto *refl_node = llvm::cast(reflections->getOperand(i)); if (get_constant_metadata(refl_node, 3) == bind_space && get_constant_metadata(refl_node, 4) == bind_register) { return get_string_metadata(refl_node, 2); } } } return get_string_metadata(node, 2); } static spv::Dim image_dimension_from_resource_kind(DXIL::ResourceKind kind) { switch (kind) { case DXIL::ResourceKind::Texture1D: case DXIL::ResourceKind::Texture1DArray: return spv::Dim1D; case DXIL::ResourceKind::Texture2D: case DXIL::ResourceKind::Texture2DMS: case DXIL::ResourceKind::Texture2DArray: case DXIL::ResourceKind::Texture2DMSArray: case DXIL::ResourceKind::FeedbackTexture2D: case DXIL::ResourceKind::FeedbackTexture2DArray: return spv::Dim2D; case DXIL::ResourceKind::Texture3D: return spv::Dim3D; case DXIL::ResourceKind::TextureCube: case DXIL::ResourceKind::TextureCubeArray: return spv::DimCube; case DXIL::ResourceKind::TypedBuffer: case DXIL::ResourceKind::StructuredBuffer: case DXIL::ResourceKind::RawBuffer: return spv::DimBuffer; default: return spv::DimMax; } } static bool image_dimension_is_arrayed(DXIL::ResourceKind kind) { switch (kind) { case DXIL::ResourceKind::Texture1DArray: case DXIL::ResourceKind::Texture2DArray: case DXIL::ResourceKind::Texture2DMSArray: case DXIL::ResourceKind::TextureCubeArray: case DXIL::ResourceKind::FeedbackTexture2DArray: return true; default: return false; } } static bool image_dimension_is_multisampled(DXIL::ResourceKind kind) { switch (kind) { case DXIL::ResourceKind::Texture2DMS: case DXIL::ResourceKind::Texture2DMSArray: return true; default: return false; } } static DXIL::ComponentType convert_16bit_component_to_32bit(DXIL::ComponentType type) { switch (type) { case DXIL::ComponentType::F16: return DXIL::ComponentType::F32; case DXIL::ComponentType::I16: return DXIL::ComponentType::I32; case DXIL::ComponentType::U16: return DXIL::ComponentType::U32; default: return type; } } static DXIL::ComponentType convert_component_to_unsigned(DXIL::ComponentType type) { switch (type) { case DXIL::ComponentType::I16: return DXIL::ComponentType::U16; case DXIL::ComponentType::I32: return DXIL::ComponentType::U32; case DXIL::ComponentType::I64: return DXIL::ComponentType::U64; default: return type; } } static DXIL::ComponentType normalize_component_type(DXIL::ComponentType type) { switch (type) { case DXIL::ComponentType::UNormF16: case DXIL::ComponentType::SNormF16: return DXIL::ComponentType::F16; case DXIL::ComponentType::UNormF32: case DXIL::ComponentType::SNormF32: return DXIL::ComponentType::F32; case DXIL::ComponentType::UNormF64: case DXIL::ComponentType::SNormF64: return DXIL::ComponentType::F64; default: return type; } } static spv::Id build_ssbo_runtime_array_type(Converter::Impl &impl, RawType type, unsigned bits, unsigned vecsize, unsigned range_size, const String &name) { auto &builder = impl.builder(); spv::Id value_type = type == RawType::Integer ? builder.makeUintType(bits) : builder.makeFloatType(bits); if (vecsize > 1) value_type = builder.makeVectorType(value_type, vecsize); spv::Id element_array_type = builder.makeRuntimeArray(value_type); builder.addDecoration(element_array_type, spv::DecorationArrayStride, vecsize * (bits / 8)); spv::Id block_type_id = impl.get_struct_type({ element_array_type }, 0, name.c_str()); builder.addMemberDecoration(block_type_id, 0, spv::DecorationOffset, 0); builder.addDecoration(block_type_id, spv::DecorationBlock); spv::Id type_id = block_type_id; if (range_size != 1) { assert(range_size != 0); if (range_size == ~0u) type_id = builder.makeRuntimeArray(type_id); else type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); } return type_id; } Vector Converter::Impl::create_bindless_heap_variable_alias_group(const BindlessInfo &base_info, const Vector &raw_decls) { Vector decls; decls.reserve(raw_decls.size()); for (auto &decl : raw_decls) { RawDeclarationVariable var = {}; var.declaration = decl; auto info = base_info; info.component = raw_width_to_component_type(decl.type, decl.width); info.raw_vecsize = decl.vecsize; var.var_id = create_bindless_heap_variable(info); decls.push_back(var); } return decls; } spv::Id Converter::Impl::create_ubo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name, unsigned cbv_size) { auto &builder = spirv_module.get_builder(); unsigned element_size = raw_width_to_bits(raw_decl.width) * raw_vecsize_to_vecsize(raw_decl.vecsize) / 8; unsigned array_length = (cbv_size + element_size - 1) / element_size; // It seems like we will have to bitcast ourselves away from vec4 here after loading. spv::Id size_id = builder.makeUintConstant(array_length, false); unsigned bits = raw_width_to_bits(raw_decl.width); spv::Id element_type = raw_decl.type == RawType::Float ? builder.makeFloatType(bits) : builder.makeUintType(bits); if (raw_decl.vecsize != RawVecSize::V1) element_type = builder.makeVectorType(element_type, raw_vecsize_to_vecsize(raw_decl.vecsize)); spv::Id member_array_type = builder.makeArrayType(element_type, size_id, element_size); builder.addDecoration(member_array_type, spv::DecorationArrayStride, element_size); auto ubo_block_name = name.empty() ? "" : (name + "UBO"); spv::Id type_id = get_struct_type({ member_array_type }, 0, ubo_block_name.c_str()); builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); builder.addDecoration(type_id, spv::DecorationBlock); if (range_size != 1) { if (range_size == ~0u) type_id = builder.makeRuntimeArray(type_id); else type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); } if (raw_decl.width == RawWidth::B16) builder.addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess); else if (raw_decl.width == RawWidth::B8) { builder.addExtension("SPV_KHR_8bit_storage"); builder.addCapability(spv::CapabilityUniformAndStorageBuffer8BitAccess); } return create_variable(spv::StorageClassUniform, type_id, name.empty() ? nullptr : name.c_str()); } spv::Id Converter::Impl::create_raw_ssbo_variable(const RawDeclaration &raw_decl, uint32_t range_size, const String &name) { spv::Id type_id = build_ssbo_runtime_array_type(*this, raw_decl.type, raw_width_to_bits(raw_decl.width), raw_vecsize_to_vecsize(raw_decl.vecsize), range_size, name + "SSBO"); if (raw_decl.width == RawWidth::B16) builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); else if (raw_decl.width == RawWidth::B8) { builder().addExtension("SPV_KHR_8bit_storage"); builder().addCapability(spv::CapabilityStorageBuffer8BitAccess); } return create_variable(spv::StorageClassStorageBuffer, type_id, name.empty() ? nullptr : name.c_str()); } Vector Converter::Impl::create_raw_ssbo_variable_alias_group( const Vector &raw_decls, uint32_t range_size, const String &name) { Vector group; group.reserve(raw_decls.size()); for (auto &decl : raw_decls) group.push_back({ decl, create_raw_ssbo_variable(decl, range_size, name) }); return group; } Vector Converter::Impl::create_ubo_variable_alias_group( const Vector &raw_decls, uint32_t range_size, const String &name, unsigned cbv_size) { Vector group; group.reserve(raw_decls.size()); for (auto &decl : raw_decls) group.push_back({ decl, create_ubo_variable(decl, range_size, name, cbv_size) }); return group; } static const char *convert_component_type_to_str(DXIL::ComponentType type) { switch (type) { case DXIL::ComponentType::U16: return "U16"; case DXIL::ComponentType::U32: return "U32"; case DXIL::ComponentType::U64: return "U64"; case DXIL::ComponentType::I16: return "I16"; case DXIL::ComponentType::I32: return "I32"; case DXIL::ComponentType::I64: return "I64"; case DXIL::ComponentType::F16: return "F16"; case DXIL::ComponentType::F32: return "F32"; case DXIL::ComponentType::F64: return "F64"; default: return ""; } } spv::Id Converter::Impl::create_bindless_heap_variable(const BindlessInfo &info) { auto itr = std::find_if(bindless_resources.begin(), bindless_resources.end(), [&](const BindlessResource &resource) { return resource.info.type == info.type && resource.info.component == info.component && resource.info.raw_vecsize == info.raw_vecsize && resource.info.kind == info.kind && resource.info.desc_set == info.desc_set && resource.info.format == info.format && resource.info.binding == info.binding && resource.info.uav_read == info.uav_read && resource.info.uav_written == info.uav_written && resource.info.uav_coherent == info.uav_coherent && resource.info.relaxed_precision == info.relaxed_precision && resource.info.aliased == info.aliased && resource.info.counters == info.counters && resource.info.offsets == info.offsets && resource.info.descriptor_type == info.descriptor_type && (!options.extended_non_semantic_info || resource.info.debug.stride == info.debug.stride); }); if (itr != bindless_resources.end()) { return itr->var_id; } else { BindlessResource resource = {}; resource.info = info; spv::Id type_id = 0; auto storage = spv::StorageClassMax; switch (info.type) { case DXIL::ResourceType::SRV: { if (info.kind == DXIL::ResourceKind::RTAccelerationStructure) { if (info.descriptor_type == VulkanDescriptorType::SSBO) { type_id = build_ssbo_runtime_array_type(*this, RawType::Integer, 32, 2, 1, "RTASHeap"); storage = spv::StorageClassStorageBuffer; } else { type_id = builder().makeAccelerationStructureType(); type_id = builder().makeRuntimeArray(type_id); storage = spv::StorageClassUniformConstant; } } else if (info.descriptor_type == VulkanDescriptorType::SSBO) { RawType raw_type = raw_component_type_to_type(info.component); unsigned bits = raw_component_type_to_bits(info.component); if (info.offsets) type_id = build_ssbo_runtime_array_type(*this, raw_type, 32, 2, 1, "SSBO_Offsets"); else type_id = build_ssbo_runtime_array_type(*this, raw_type, bits, raw_vecsize_to_vecsize(info.raw_vecsize), ~0u, "SSBO"); storage = spv::StorageClassStorageBuffer; if (bits == 16) builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); else if (bits == 8) { builder().addExtension("SPV_KHR_8bit_storage"); builder().addCapability(spv::CapabilityStorageBuffer8BitAccess); } } else { if (info.component != DXIL::ComponentType::U32 && info.component != DXIL::ComponentType::I32 && info.component != DXIL::ComponentType::F32) { LOGE("Invalid component type for image.\n"); return 0; } spv::Id sampled_type_id = get_type_id(info.component, 1, 1); type_id = builder().makeImageType(sampled_type_id, image_dimension_from_resource_kind(info.kind), false, image_dimension_is_arrayed(info.kind), image_dimension_is_multisampled(info.kind), 1, spv::ImageFormatUnknown); type_id = builder().makeRuntimeArray(type_id); storage = spv::StorageClassUniformConstant; } break; } case DXIL::ResourceType::UAV: { if (info.counters) { if (info.kind == DXIL::ResourceKind::Invalid) { auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::RawDescriptorHeapView)]; if (mapping.kind == MetaDescriptorKind::UBOContainingBDA) { // This is faster access than the normal SSBO descriptor path. if (info.desc_set != mapping.desc_set || info.binding != mapping.desc_binding) LOGW("Using meta CBV mapping for physical descriptors, but there is a mismatch in requested bindings.\n"); if (!emit_descriptor_heap_introspection_buffer()) return 0; return instrumentation.descriptor_heap_introspection_var_id; } else { spv::Id uint_type = builder().makeUintType(32); spv::Id uvec2_type = builder().makeVectorType(uint_type, 2); spv::Id runtime_array_type_id = builder().makeRuntimeArray(uvec2_type); builder().addDecoration(runtime_array_type_id, spv::DecorationArrayStride, sizeof(uint64_t)); type_id = get_struct_type({ runtime_array_type_id }, 0, "AtomicCounters"); builder().addDecoration(type_id, spv::DecorationBlock); builder().addMemberName(type_id, 0, "counters"); builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); builder().addMemberDecoration(type_id, 0, spv::DecorationNonWritable); } } else { spv::Id uint_type = builder().makeUintType(32); type_id = get_struct_type({ uint_type }, 0, "AtomicCounters"); builder().addDecoration(type_id, spv::DecorationBlock); builder().addMemberName(type_id, 0, "counter"); builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); type_id = builder().makeRuntimeArray(type_id); } storage = spv::StorageClassStorageBuffer; } else if (info.descriptor_type == VulkanDescriptorType::SSBO) { RawType raw_type = raw_component_type_to_type(info.component); unsigned bits = raw_component_type_to_bits(info.component); type_id = build_ssbo_runtime_array_type(*this, raw_type, bits, raw_vecsize_to_vecsize(info.raw_vecsize), ~0u, "SSBO"); storage = spv::StorageClassStorageBuffer; if (bits == 16) builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); else if (bits == 8) { builder().addExtension("SPV_KHR_8bit_storage"); builder().addCapability(spv::CapabilityStorageBuffer8BitAccess); } } else { if (info.component != DXIL::ComponentType::U32 && info.component != DXIL::ComponentType::I32 && info.component != DXIL::ComponentType::F32 && info.component != DXIL::ComponentType::U64) { LOGE("Invalid component type for image.\n"); return 0; } spv::Id sampled_type_id = get_type_id(info.component, 1, 1); type_id = builder().makeImageType(sampled_type_id, image_dimension_from_resource_kind(info.kind), false, image_dimension_is_arrayed(info.kind), image_dimension_is_multisampled(info.kind), 2, info.format); type_id = builder().makeRuntimeArray(type_id); storage = spv::StorageClassUniformConstant; } break; } case DXIL::ResourceType::Sampler: type_id = builder().makeSamplerType(); type_id = builder().makeRuntimeArray(type_id); storage = spv::StorageClassUniformConstant; break; case DXIL::ResourceType::CBV: { RawType raw_type = raw_component_type_to_type(info.component); unsigned bits = raw_component_type_to_bits(info.component); unsigned vecsize = raw_vecsize_to_vecsize(info.raw_vecsize); type_id = raw_type == RawType::Float ? builder().makeFloatType(bits) : builder().makeUintType(bits); if (vecsize > 1) type_id = builder().makeVectorType(type_id, vecsize); unsigned element_size = (bits / 8) * vecsize; unsigned num_elements = 0x10000 / element_size; type_id = builder().makeArrayType(type_id, builder().makeUintConstant(num_elements), element_size); builder().addDecoration(type_id, spv::DecorationArrayStride, element_size); type_id = get_struct_type({ type_id }, 0, "BindlessCBV"); builder().addDecoration(type_id, spv::DecorationBlock); if (options.bindless_cbv_ssbo_emulation) builder().addMemberDecoration(type_id, 0, spv::DecorationNonWritable); builder().addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); type_id = builder().makeRuntimeArray(type_id); storage = options.bindless_cbv_ssbo_emulation ? spv::StorageClassStorageBuffer : spv::StorageClassUniform; if (bits == 16) { if (options.bindless_cbv_ssbo_emulation) builder().addCapability(spv::CapabilityStorageBuffer16BitAccess); else builder().addCapability(spv::CapabilityUniformAndStorageBuffer16BitAccess); } else if (bits == 8) { builder().addExtension("SPV_KHR_8bit_storage"); if (options.bindless_cbv_ssbo_emulation) builder().addCapability(spv::CapabilityStorageBuffer8BitAccess); else builder().addCapability(spv::CapabilityUniformAndStorageBuffer8BitAccess); } break; } default: return 0; } builder().addExtension("SPV_EXT_descriptor_indexing"); builder().addCapability(spv::CapabilityRuntimeDescriptorArrayEXT); resource.var_id = create_variable(storage, type_id); if (options.extended_non_semantic_info) { String name; switch (info.type) { case DXIL::ResourceType::SRV: name = "SRV"; break; case DXIL::ResourceType::UAV: name = "UAV"; break; case DXIL::ResourceType::CBV: name = "CBV"; break; case DXIL::ResourceType::Sampler: name = "Sampler"; break; default: break; } const char *component_type_name = convert_component_type_to_str(info.component); switch (info.kind) { case DXIL::ResourceKind::RawBuffer: name += "_ByteAddressBuffer"; name += "_vec"; name += std::to_string(raw_vecsize_to_vecsize(info.raw_vecsize)).c_str(); builder().addName(builder().getContainedTypeId(type_id), (name + "_Block").c_str()); break; case DXIL::ResourceKind::StructuredBuffer: name += "_StructuredBuffer_"; name += std::to_string(info.debug.stride).c_str(); name += "_vec"; name += std::to_string(raw_vecsize_to_vecsize(info.raw_vecsize)).c_str(); builder().addName(builder().getContainedTypeId(type_id), (name + "_Block").c_str()); break; case DXIL::ResourceKind::CBuffer: builder().addName(builder().getContainedTypeId(type_id), (name + "_Block").c_str()); break; case DXIL::ResourceKind::TypedBuffer: name += "_TypedBuffer_"; name += component_type_name; break; case DXIL::ResourceKind::Texture1D: name += "_1D_"; name += component_type_name; break; case DXIL::ResourceKind::Texture1DArray: name += "_1DArray_"; name += component_type_name; break; case DXIL::ResourceKind::Texture2D: name += "_2D_"; name += component_type_name; break; case DXIL::ResourceKind::Texture2DArray: name += "_2DArray_"; name += component_type_name; break; case DXIL::ResourceKind::Texture2DMS: name += "_2DMS_"; name += component_type_name; break; case DXIL::ResourceKind::Texture2DMSArray: name += "_2DMSArray_"; name += component_type_name; break; case DXIL::ResourceKind::TextureCube: name += "_Cube_"; name += component_type_name; break; case DXIL::ResourceKind::TextureCubeArray: name += "_CubeArray_"; name += component_type_name; break; case DXIL::ResourceKind::Texture3D: name += "_3D_"; name += component_type_name; break; default: break; } builder().addName(resource.var_id, name.c_str()); } auto &meta = handle_to_resource_meta[resource.var_id]; meta = {}; meta.kind = info.kind; meta.component_type = info.component; meta.raw_component_vecsize = info.raw_vecsize; meta.var_id = resource.var_id; meta.storage = storage; builder().addDecoration(resource.var_id, spv::DecorationDescriptorSet, info.desc_set); builder().addDecoration(resource.var_id, spv::DecorationBinding, info.binding); if (info.relaxed_precision) { builder().addDecoration(resource.var_id, spv::DecorationRelaxedPrecision); // Signal the intended component type. switch (meta.component_type) { case DXIL::ComponentType::F32: meta.component_type = DXIL::ComponentType::F16; break; case DXIL::ComponentType::I32: meta.component_type = DXIL::ComponentType::I16; break; case DXIL::ComponentType::U32: meta.component_type = DXIL::ComponentType::U16; break; default: break; } } if (info.type == DXIL::ResourceType::UAV && !info.counters) { if (!info.uav_read) builder().addDecoration(resource.var_id, spv::DecorationNonReadable); if (!info.uav_written) builder().addDecoration(resource.var_id, spv::DecorationNonWritable); if (info.uav_coherent && execution_mode_meta.memory_model == spv::MemoryModelGLSL450) builder().addDecoration(resource.var_id, spv::DecorationCoherent); } else if (info.counters && info.kind == DXIL::ResourceKind::Invalid) { builder().addDecoration(resource.var_id, spv::DecorationAliasedPointer); } else if (info.type == DXIL::ResourceType::SRV && info.descriptor_type == VulkanDescriptorType::SSBO) { builder().addDecoration(resource.var_id, spv::DecorationNonWritable); builder().addDecoration(resource.var_id, spv::DecorationRestrict); } // The default in Vulkan environment is Restrict. if (info.aliased && info.type == DXIL::ResourceType::UAV) builder().addDecoration(resource.var_id, spv::DecorationAliased); bindless_resources.push_back(resource); return resource.var_id; } } Converter::Impl::ResourceVariableMeta Converter::Impl::get_resource_variable_meta(const llvm::MDNode *resource) const { ResourceVariableMeta meta = {}; if (!resource) return meta; if (const auto *variable = llvm::dyn_cast(resource->getOperand(1))) { const llvm::Value *val = variable->getValue(); const auto *global = llvm::dyn_cast(val); // It's possible that the variable is a constexpr bitcast, so resolve those ... while (!global && val) { auto *constexpr_op = llvm::dyn_cast(val); val = nullptr; if (constexpr_op && constexpr_op->getOpcode() == llvm::UnaryOperator::BitCast) { val = constexpr_op->getOperand(0); global = llvm::dyn_cast(val); } } if (global) { meta.is_lib_variable = true; meta.is_active = llvm_active_global_resource_variables.count(global) != 0; return meta; } } meta.is_active = true; return meta; } void Converter::Impl::register_resource_meta_reference(const llvm::MDOperand &operand, DXIL::ResourceType type, unsigned index) { // In RT shaders, apps will load dummy structs from global variables. // Here we get the chance to redirect them towards the resource meta declaration. if (operand) { auto *value = llvm::cast(operand)->getValue(); // In lib_6_6, this is somehow a bitcasted pointer expression, sigh ... // Drill deep until we actually find the original resource. while (auto *cexpr = llvm::dyn_cast(value)) { if (cexpr->getOpcode() == llvm::Instruction::BitCast) value = cexpr->getOperand(0); else break; } auto *global_variable = llvm::dyn_cast(value); if (global_variable) llvm_global_variable_to_resource_mapping[global_variable] = { type, index, nullptr, global_variable, false }; } } bool Converter::Impl::emit_resources_global_mapping(DXIL::ResourceType type, const llvm::MDNode *node) { unsigned num_resources = node->getNumOperands(); for (unsigned i = 0; i < num_resources; i++) { auto *resource = llvm::cast(node->getOperand(i)); unsigned index = get_constant_metadata(resource, 0); if (type == DXIL::ResourceType::UAV) { unsigned bind_space = get_constant_metadata(resource, 3); unsigned bind_register = get_constant_metadata(resource, 4); auto resource_kind = static_cast(get_constant_metadata(resource, 6)); if (bind_space == AgsUAVMagicRegisterSpace && resource_kind == DXIL::ResourceKind::RawBuffer) { ags.uav_magic_resource_type_index = index; } else if (options.nvapi.enabled && options.nvapi.register_index == bind_register && options.nvapi.register_space == bind_space && resource_kind == DXIL::ResourceKind::StructuredBuffer) { nvapi.uav_magic_resource_type_index = index; } } register_resource_meta_reference(resource->getOperand(1), type, index); } return true; } spv::Id Converter::Impl::get_physical_pointer_block_type(spv::Id base_type_id, const PhysicalPointerMeta &meta) { auto itr = std::find_if(physical_pointer_entries.begin(), physical_pointer_entries.end(), [&](const PhysicalPointerEntry &entry) { return entry.meta.coherent == meta.coherent && entry.meta.nonreadable == meta.nonreadable && entry.meta.nonwritable == meta.nonwritable && entry.meta.size == meta.size && entry.meta.stride == meta.stride && entry.base_type_id == base_type_id; }); if (itr != physical_pointer_entries.end()) return itr->ptr_type_id; int vecsize = builder().getNumTypeComponents(base_type_id); int width = builder().getScalarTypeWidth(base_type_id); spv::Op op = builder().getTypeClass(base_type_id); if (op == spv::OpTypeVector) op = builder().getTypeClass(builder().getScalarTypeId(base_type_id)); String type = "PhysicalPointer"; switch (op) { case spv::OpTypeFloat: if (width == 16) type += "Half"; else if (width == 32) type += "Float"; else if (width == 64) type += "Double"; break; case spv::OpTypeInt: if (width == 16) type += "Ushort"; else if (width == 32) type += "Uint"; else if (width == 64) type += "Uint64"; break; default: break; } if (vecsize > 1) type += std::to_string(vecsize).c_str(); if (meta.nonwritable) type += "NonWrite"; if (meta.nonreadable) type += "NonRead"; if (meta.coherent) type += "Coherent"; spv::Id type_id = base_type_id; if (meta.stride > 0) { if (meta.size == 0) { type_id = builder().makeRuntimeArray(type_id); type += "Array"; } else { type_id = builder().makeArrayType(type_id, builder().makeUintConstant(meta.size / meta.stride), meta.stride); type += "CBVArray"; } builder().addDecoration(type_id, spv::DecorationArrayStride, meta.stride); } spv::Id block_type_id = builder().makeStructType({ type_id }, type.c_str()); builder().addMemberDecoration(block_type_id, 0, spv::DecorationOffset, 0); builder().addMemberName(block_type_id, 0, "value"); builder().addDecoration(block_type_id, spv::DecorationBlock); if (meta.nonwritable) builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonWritable); if (meta.nonreadable) builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonReadable); if (meta.coherent && execution_mode_meta.memory_model == spv::MemoryModelGLSL450) builder().addMemberDecoration(block_type_id, 0, spv::DecorationCoherent); spv::Id ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, block_type_id); PhysicalPointerEntry new_entry = {}; new_entry.ptr_type_id = ptr_type_id; new_entry.base_type_id = base_type_id; new_entry.meta = meta; physical_pointer_entries.push_back(new_entry); return ptr_type_id; } static bool component_type_is_16bit(DXIL::ComponentType type) { switch (type) { case DXIL::ComponentType::F16: case DXIL::ComponentType::I16: case DXIL::ComponentType::U16: return true; default: return false; } } bool Converter::Impl::analyze_aliased_access(const AccessTracking &tracking, VulkanDescriptorType descriptor_type, AliasedAccess &aliased_access) const { bool raw_access_16bit = false; bool raw_access_64bit = false; for (int type_ = 0; type_ < int(RawType::Count); type_++) { for (int width_ = 0; width_ < int(RawWidth::Count); width_++) { auto width = RawWidth(width_); if (width == RawWidth::B16 && !execution_mode_meta.native_16bit_operations) continue; for (int vecsize_ = 0; vecsize_ < int(RawVecSize::Count); vecsize_++) { auto vecsize = RawVecSize(vecsize_); auto type = RawType(type_); // Non-native 16-bit SSBOs are declared as 32-bit, so avoid false aliases. bool has_decl = tracking.raw_access_buffer_declarations[type_][width_][vecsize_]; if (!has_decl && RawWidth(width) == RawWidth::B32 && !execution_mode_meta.native_16bit_operations) has_decl = tracking.raw_access_buffer_declarations[type_][unsigned(RawWidth::B16)][vecsize_]; if (has_decl) { if (width == RawWidth::B16) raw_access_16bit = true; else if (width == RawWidth::B64) raw_access_64bit = true; aliased_access.raw_declarations.push_back({ type, width, vecsize }); aliased_access.primary_component_type = raw_width_to_component_type(type, width); aliased_access.primary_raw_vecsize = vecsize; } } } } if (raw_access_16bit && descriptor_type != VulkanDescriptorType::SSBO && descriptor_type != VulkanDescriptorType::UBO && descriptor_type != VulkanDescriptorType::BufferDeviceAddress) { LOGE("Raw 16-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n"); return false; } if (raw_access_64bit && descriptor_type != VulkanDescriptorType::SSBO && descriptor_type != VulkanDescriptorType::UBO && descriptor_type != VulkanDescriptorType::BufferDeviceAddress) { LOGE("Raw 64-bit load-store was used, which must be implemented with SSBO, UBO or BDA.\n"); return false; } // Only SSBO and UBO can be reclared with different types. // Typed descriptors are always scalar. aliased_access.requires_alias_decoration = (descriptor_type == VulkanDescriptorType::SSBO || descriptor_type == VulkanDescriptorType::UBO) && aliased_access.raw_declarations.size() > 1; // If we only emit one 16-bit or 64-bit SSBO/UBO, we need to override the component type of that meta declaration. aliased_access.override_primary_component_types = (descriptor_type == VulkanDescriptorType::SSBO || descriptor_type == VulkanDescriptorType::UBO) && aliased_access.raw_declarations.size() == 1; // If the SSBO is never actually accessed (UAV counters for example), fudge the default type. if (descriptor_type == VulkanDescriptorType::SSBO && aliased_access.raw_declarations.empty()) aliased_access.raw_declarations.push_back({ RawType::Integer, RawWidth::B32, RawVecSize::V1 }); // If the CBV is never actually accessed, fudge the default legacy CBV type. if (descriptor_type == VulkanDescriptorType::UBO && aliased_access.raw_declarations.empty()) aliased_access.raw_declarations.push_back({ RawType::Float, RawWidth::B32, RawVecSize::V4 }); // Safeguard against unused variables where we never end up setting any primary component type. if ((descriptor_type == VulkanDescriptorType::SSBO || descriptor_type == VulkanDescriptorType::UBO) && aliased_access.raw_declarations.size() == 1) { aliased_access.primary_component_type = raw_width_to_component_type(aliased_access.raw_declarations.front().type, aliased_access.raw_declarations.front().width); aliased_access.primary_raw_vecsize = aliased_access.raw_declarations.front().vecsize; aliased_access.override_primary_component_types = true; } return true; } void Converter::Impl::emit_non_semantic_debug_info(const NonSemanticDebugInfo &info) { auto &b = spirv_module.get_builder(); b.addExtension("SPV_KHR_non_semantic_info"); spv::Id ext = b.import("NonSemantic.dxil-spirv.signature"); auto *u8_data = static_cast(info.data); // If the root sig is massive (likely because it came from a full DXIL blob or something), // need to dump in multiple stages due to opcode limits. for (size_t i = 0; i < info.size; i += 64 * 1024) { size_t to_dump = std::min(info.size - i, 64 * 1024); auto inst = std::make_unique( b.getUniqueId(), b.makeVoidType(), spv::OpExtInst); inst->addIdOperand(ext); inst->addImmediateOperand(1); inst->addIdOperand(b.addString(info.tag)); for (size_t j = 0; j < (to_dump & ~size_t(3)); j += 4) { uint32_t v; memcpy(&v, u8_data + i + j, sizeof(v)); inst->addIdOperand(b.makeUintConstant(v)); } for (size_t j = to_dump & ~size_t(3); j < to_dump; j++) inst->addIdOperand(b.makeUint8Constant(u8_data[i + j])); b.addExternal(std::move(inst)); } } void Converter::Impl::emit_root_parameter_index_from_push_index(const char *tag, uint32_t index, uint32_t size, bool bda) { bool descriptor_packing = (index & 0x80000000) != 0; uint32_t parameter_index = UINT32_MAX; uint32_t effective_offset = 0; if (descriptor_packing) { for (auto &mapping : root_parameter_mappings) { if (mapping.offset == index) { parameter_index = mapping.root_parameter_index; break; } } } else { effective_offset = bda ? index * 8 : (index * 4 + root_descriptor_count * 8); for (auto &mapping: root_parameter_mappings) { if (mapping.offset == effective_offset) { parameter_index = mapping.root_parameter_index; break; } } } if (parameter_index == UINT32_MAX) return; // Avoid lots of spam. if ((1ull << parameter_index) & root_parameter_emit_mask) return; root_parameter_emit_mask |= 1ull << parameter_index; auto &b = spirv_module.get_builder(); b.addExtension("SPV_KHR_non_semantic_info"); spv::Id ext = b.import("NonSemantic.dxil-spirv.signature"); auto inst = std::make_unique(b.getUniqueId(), b.makeVoidType(), spv::OpExtInst); inst->addIdOperand(ext); inst->addImmediateOperand(0); inst->addIdOperand(b.addString(tag)); inst->addIdOperand(b.makeUintConstant(parameter_index)); if (descriptor_packing) { inst->addIdOperand(b.makeUintConstant((index >> 24) & 0x7f)); inst->addIdOperand(b.makeUintConstant(index & 0xffffff)); } else { inst->addIdOperand(b.makeUintConstant(effective_offset)); inst->addIdOperand(b.makeUintConstant(size)); } b.addExternal(std::move(inst)); } bool Converter::Impl::emit_srvs(const llvm::MDNode *srvs, const llvm::MDNode *refl) { auto &builder = spirv_module.get_builder(); unsigned num_srvs = srvs->getNumOperands(); for (unsigned i = 0; i < num_srvs; i++) { auto *srv = llvm::cast(srvs->getOperand(i)); auto var_meta = get_resource_variable_meta(srv); if (!var_meta.is_active) continue; unsigned index = get_constant_metadata(srv, 0); auto name = get_resource_name_metadata(srv, refl); unsigned bind_space = get_constant_metadata(srv, 3); unsigned bind_register = get_constant_metadata(srv, 4); unsigned range_size = get_constant_metadata(srv, 5); if (bind_register == UINT32_MAX && bind_space == UINT32_MAX) { // This seems to be possible in RT shaders when explicit register() is missing? LOGE("Nonsensical SRV binding detected.\n"); return false; } auto resource_kind = static_cast(get_constant_metadata(srv, 6)); llvm::MDNode *tags = nullptr; if (srv->getNumOperands() >= 9 && srv->getOperand(8)) tags = llvm::dyn_cast(srv->getOperand(8)); auto actual_component_type = DXIL::ComponentType::U32; auto effective_component_type = actual_component_type; unsigned stride = 0; if (tags && get_constant_metadata(tags, 0) == 0) { // Sampled format. actual_component_type = normalize_component_type(static_cast(get_constant_metadata(tags, 1))); effective_component_type = get_effective_typed_resource_type(actual_component_type); } else { // Structured/Raw buffers, just use uint for good measure, we'll bitcast as needed. // Field 1 is stride, but we don't care about that unless we will support an SSBO path. if (tags) stride = get_constant_metadata(tags, 1); } unsigned alignment = resource_kind == DXIL::ResourceKind::RawBuffer ? 16 : (stride & -int(stride)); DescriptorTableEntry local_table_entry = {}; int local_root_signature_entry = get_local_root_signature_entry( ResourceClass::SRV, bind_space, bind_register, local_table_entry); bool need_resource_remapping = local_root_signature_entry < 0 || local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table; D3DBinding d3d_binding = { get_remapping_stage(execution_model), resource_kind, index, bind_space, bind_register, range_size, alignment, }; VulkanSRVBinding vulkan_binding = { { bind_space, bind_register }, {} }; if (need_resource_remapping && resource_mapping_iface && !resource_mapping_iface->remap_srv(d3d_binding, vulkan_binding)) { // We may be rejected if the unbound range has 1 non-bindless descriptor. bool retry = d3d_binding.range_size == UINT32_MAX; if (retry) { d3d_binding.range_size = 1; range_size = 1; } if (!retry || !resource_mapping_iface->remap_srv(d3d_binding, vulkan_binding)) { LOGE("Failed to remap SRV %u:%u.\n", bind_space, bind_register); return false; } } auto &access_meta = srv_access_tracking[index]; AliasedAccess aliased_access; if (!analyze_aliased_access(access_meta, need_resource_remapping ? vulkan_binding.buffer_binding.descriptor_type : VulkanDescriptorType::BufferDeviceAddress, aliased_access)) { return false; } if (range_size != 1 && resource_kind != DXIL::ResourceKind::RTAccelerationStructure) { if (range_size == ~0u) { builder.addExtension("SPV_EXT_descriptor_indexing"); builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT); } if ((resource_kind == DXIL::ResourceKind::StructuredBuffer || resource_kind == DXIL::ResourceKind::RawBuffer) && vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO) { builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing); } else if (resource_kind == DXIL::ResourceKind::StructuredBuffer || resource_kind == DXIL::ResourceKind::RawBuffer || resource_kind == DXIL::ResourceKind::TypedBuffer) { builder.addExtension("SPV_EXT_descriptor_indexing"); builder.addCapability(spv::CapabilityUniformTexelBufferArrayDynamicIndexingEXT); } else builder.addCapability(spv::CapabilitySampledImageArrayDynamicIndexing); } srv_index_to_reference.resize(std::max(srv_index_to_reference.size(), size_t(index + 1))); srv_index_to_offset.resize(std::max(srv_index_to_offset.size(), size_t(index + 1))); if (!get_ssbo_offset_buffer_id(srv_index_to_offset[index], vulkan_binding.buffer_binding, vulkan_binding.offset_binding, resource_kind, alignment)) return false; BindlessInfo bindless_info = {}; bindless_info.type = DXIL::ResourceType::SRV; bindless_info.component = effective_component_type; bindless_info.kind = resource_kind; bindless_info.desc_set = vulkan_binding.buffer_binding.descriptor_set; bindless_info.binding = vulkan_binding.buffer_binding.binding; bindless_info.descriptor_type = vulkan_binding.buffer_binding.descriptor_type; bindless_info.relaxed_precision = actual_component_type != effective_component_type && component_type_is_16bit(actual_component_type); bindless_info.debug.stride = stride; if (local_root_signature_entry >= 0) { auto &entry = local_root_signature[local_root_signature_entry]; if (entry.type == LocalRootSignatureType::Table) { if (!vulkan_binding.buffer_binding.bindless.use_heap) { LOGE("Table SBT entries must be bindless.\n"); return false; } if (!var_meta.is_lib_variable) { LOGE("Local root signature requires global lib variables.\n"); return false; } uint32_t heap_offset = local_table_entry.offset_in_heap; heap_offset += bind_register - local_table_entry.register_index; auto &ref = srv_index_to_reference[index]; if (aliased_access.requires_alias_decoration) { ref.var_alias_group = create_bindless_heap_variable_alias_group( bindless_info, aliased_access.raw_declarations); } else if (aliased_access.override_primary_component_types) { auto tmp_info = bindless_info; tmp_info.component = aliased_access.primary_component_type; tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize; ref.var_id = create_bindless_heap_variable(tmp_info); } else { ref.var_id = create_bindless_heap_variable(bindless_info); } ref.aliased = aliased_access.requires_alias_decoration; ref.base_offset = heap_offset; ref.base_resource_is_array = range_size != 1; ref.stride = stride; ref.bindless = true; ref.local_root_signature_entry = local_root_signature_entry; ref.resource_kind = resource_kind; } else { // Otherwise, we simply refer to the SBT directly to obtain a pointer. if (resource_kind != DXIL::ResourceKind::RawBuffer && resource_kind != DXIL::ResourceKind::StructuredBuffer && resource_kind != DXIL::ResourceKind::RTAccelerationStructure) { LOGE("SRV SBT root descriptors must be raw buffers, structured buffers or RTAS.\n"); return false; } auto &ref = srv_index_to_reference[index]; ref.var_id = shader_record_buffer_id; ref.stride = stride; ref.local_root_signature_entry = local_root_signature_entry; ref.resource_kind = resource_kind; if (range_size != 1) { LOGE("Cannot use descriptor array for root descriptors.\n"); return false; } } } else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::BufferDeviceAddress) { if (resource_kind != DXIL::ResourceKind::RawBuffer && resource_kind != DXIL::ResourceKind::StructuredBuffer && resource_kind != DXIL::ResourceKind::RTAccelerationStructure) { LOGE("BDA root descriptors must be raw buffers, structured buffers or RTAS.\n"); return false; } auto &ref = srv_index_to_reference[index]; ref.var_id = root_constant_id; ref.root_descriptor = true; ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index; ref.stride = stride; ref.resource_kind = resource_kind; if (options.extended_non_semantic_info) emit_root_parameter_index_from_push_index("SRV", ref.push_constant_member, 8, true); if (range_size != 1) { LOGE("Cannot use descriptor array for root descriptors.\n"); return false; } } else if (vulkan_binding.buffer_binding.bindless.use_heap) { // DXIL already applies the t# register offset to any dynamic index, so counteract that here. // The exception is with lib_* where we access resources by variable, not through // createResource() >_____<. uint32_t heap_offset = vulkan_binding.buffer_binding.bindless.heap_root_offset; if (range_size != 1 && !var_meta.is_lib_variable) heap_offset -= bind_register; auto &ref = srv_index_to_reference[index]; if (aliased_access.requires_alias_decoration) { ref.var_alias_group = create_bindless_heap_variable_alias_group( bindless_info, aliased_access.raw_declarations); } else if (aliased_access.override_primary_component_types) { auto tmp_info = bindless_info; tmp_info.component = aliased_access.primary_component_type; tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize; ref.var_id = create_bindless_heap_variable(tmp_info); } else { ref.var_id = create_bindless_heap_variable(bindless_info); } ref.aliased = aliased_access.requires_alias_decoration; ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index + root_descriptor_count; ref.base_offset = heap_offset; ref.stride = stride; ref.bindless = true; ref.base_resource_is_array = range_size != 1; ref.resource_kind = resource_kind; if (options.extended_non_semantic_info) emit_root_parameter_index_from_push_index("ResourceTable", vulkan_binding.buffer_binding.root_constant_index, 4, false); } else { auto sampled_type_id = get_type_id(effective_component_type, 1, 1); spv::Id type_id = 0; auto storage = spv::StorageClassUniformConstant; if (resource_kind == DXIL::ResourceKind::RTAccelerationStructure) { type_id = builder.makeAccelerationStructureType(); } else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO) { storage = spv::StorageClassStorageBuffer; // Defer typing the SSBOs. } else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::InputAttachment) { if (execution_model != spv::ExecutionModelFragment) { LOGE("InputAttachments can only be used in pixel shaders.\n"); return false; } if (range_size != 1) { LOGE("Cannot bind input attachment to array of descriptors.\n"); return false; } if (resource_kind != DXIL::ResourceKind::Texture2D && resource_kind != DXIL::ResourceKind::Texture2DMS) { LOGE("Can only bind Texture2D and Texture2DMS to input attachments.\n"); return false; } type_id = builder.makeImageType(sampled_type_id, spv::DimSubpassData, false, false, image_dimension_is_multisampled(resource_kind), 2, spv::ImageFormatUnknown); } else { type_id = builder.makeImageType(sampled_type_id, image_dimension_from_resource_kind(resource_kind), false, image_dimension_is_arrayed(resource_kind), image_dimension_is_multisampled(resource_kind), 1, spv::ImageFormatUnknown); if (range_size != 1) { if (range_size == ~0u) type_id = builder.makeRuntimeArray(type_id); else type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); } } auto &ref = srv_index_to_reference[index]; if (type_id) ref.var_id = create_variable(storage, type_id, name.empty() ? nullptr : name.c_str()); else if (aliased_access.requires_alias_decoration) ref.var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name); else { assert(aliased_access.raw_declarations.size() == 1); ref.var_id = create_raw_ssbo_variable(aliased_access.raw_declarations.front(), range_size, name); } if (actual_component_type != effective_component_type && component_type_is_16bit(actual_component_type)) builder.addDecoration(ref.var_id, spv::DecorationRelaxedPrecision); const auto decorate_variable = [&](spv::Id id) { builder.addDecoration(id, spv::DecorationDescriptorSet, vulkan_binding.buffer_binding.descriptor_set); builder.addDecoration(id, spv::DecorationBinding, vulkan_binding.buffer_binding.binding); if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO) { // Make it crystal clear this is a read-only SSBO which cannot observe changed from other SSBO writes. // Do not emit Aliased here even for type aliases // since we cannot observe writes from other descriptors anyways. builder.addDecoration(id, spv::DecorationNonWritable); builder.addDecoration(id, spv::DecorationRestrict); } else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::InputAttachment && vulkan_binding.buffer_binding.input_attachment_index != -1u) { builder.addDecoration(id, spv::DecorationInputAttachmentIndex, int(vulkan_binding.buffer_binding.input_attachment_index)); } }; if (ref.var_id) decorate_variable(ref.var_id); for (auto &var : ref.var_alias_group) decorate_variable(var.var_id); ref.aliased = aliased_access.requires_alias_decoration; ref.base_resource_is_array = range_size != 1; ref.stride = stride; ref.resource_kind = resource_kind; // Counteract any offsets. if (range_size != 1 && !var_meta.is_lib_variable) ref.base_offset = -bind_register; if (ref.var_id) { auto &meta = handle_to_resource_meta[ref.var_id]; meta = {}; meta.kind = resource_kind; meta.stride = stride; meta.var_id = ref.var_id; meta.storage = storage; meta.component_type = actual_component_type; if (aliased_access.override_primary_component_types) { meta.component_type = aliased_access.primary_component_type; meta.raw_component_vecsize = aliased_access.primary_raw_vecsize; } } for (auto &var : ref.var_alias_group) { auto &meta = handle_to_resource_meta[var.var_id]; meta = {}; meta.kind = resource_kind; meta.component_type = raw_width_to_component_type(var.declaration.type, var.declaration.width); meta.raw_component_vecsize = var.declaration.vecsize; meta.stride = stride; meta.var_id = var.var_id; meta.storage = storage; } } } return true; } bool Converter::Impl::get_ssbo_offset_buffer_id(spv::Id &buffer_id, const VulkanBinding &buffer_binding, const VulkanBinding &offset_binding, DXIL::ResourceKind kind, unsigned alignment) { buffer_id = 0; bool is_buffer_type = kind == DXIL::ResourceKind::StructuredBuffer || kind == DXIL::ResourceKind::RawBuffer || kind == DXIL::ResourceKind::TypedBuffer; if (!is_buffer_type) return true; bool use_offsets = false; // If we're emitting an SSBO where we expect small alignment, we'll need to carry forward an "offset". if (buffer_binding.descriptor_type == VulkanDescriptorType::SSBO) { if (kind != DXIL::ResourceKind::TypedBuffer && (alignment & (options.ssbo_alignment - 1)) != 0) { if (!buffer_binding.bindless.use_heap) { LOGE("SSBO offset is only supported for bindless SSBOs.\n"); return false; } if (offset_binding.bindless.use_heap) { LOGE("SSBO offset buffer must be a bindless buffer.\n"); return false; } use_offsets = true; } } else if (options.bindless_typed_buffer_offsets && buffer_binding.bindless.use_heap) { use_offsets = true; } if (use_offsets) { BindlessInfo bindless_info = {}; bindless_info.descriptor_type = VulkanDescriptorType::SSBO; bindless_info.type = DXIL::ResourceType::SRV; bindless_info.offsets = true; bindless_info.desc_set = offset_binding.descriptor_set; bindless_info.binding = offset_binding.binding; bindless_info.component = DXIL::ComponentType::U32; bindless_info.kind = DXIL::ResourceKind::RawBuffer; buffer_id = create_bindless_heap_variable(bindless_info); } return true; } bool Converter::Impl::get_uav_image_format(DXIL::ResourceKind resource_kind, DXIL::ComponentType actual_component_type, const AccessTracking &access_meta, spv::ImageFormat &format) { if (resource_kind == DXIL::ResourceKind::FeedbackTexture2D || resource_kind == DXIL::ResourceKind::FeedbackTexture2DArray) { format = spv::ImageFormatR64ui; builder().addExtension("SPV_EXT_shader_image_int64"); builder().addCapability(spv::CapabilityInt64ImageEXT); return true; } else if (resource_kind != DXIL::ResourceKind::RawBuffer && resource_kind != DXIL::ResourceKind::StructuredBuffer) { // For any typed resource, we need to check if the resource is being read. // To avoid StorageReadWithoutFormat, we emit a format based on the component type. if (access_meta.has_read) { if (options.typed_uav_read_without_format && !access_meta.has_atomic) { builder().addCapability(spv::CapabilityStorageImageReadWithoutFormat); format = spv::ImageFormatUnknown; } else { switch (actual_component_type) { case DXIL::ComponentType::U32: format = spv::ImageFormatR32ui; break; case DXIL::ComponentType::I32: format = spv::ImageFormatR32i; break; case DXIL::ComponentType::F32: format = spv::ImageFormatR32f; break; case DXIL::ComponentType::U64: format = spv::ImageFormatR64ui; builder().addExtension("SPV_EXT_shader_image_int64"); builder().addCapability(spv::CapabilityInt64ImageEXT); break; default: LOGE("Reading from UAV, but component type does not conform to U32, I32, F32 or U64. " "typed_uav_read_without_format option must be enabled.\n"); return false; } } } } return true; } bool Converter::Impl::emit_uavs(const llvm::MDNode *uavs, const llvm::MDNode *refl) { auto &builder = spirv_module.get_builder(); unsigned num_uavs = uavs->getNumOperands(); for (unsigned i = 0; i < num_uavs; i++) { auto *uav = llvm::cast(uavs->getOperand(i)); auto var_meta = get_resource_variable_meta(uav); if (!var_meta.is_active) continue; unsigned index = get_constant_metadata(uav, 0); auto name = get_resource_name_metadata(uav, refl); unsigned bind_space = get_constant_metadata(uav, 3); unsigned bind_register = get_constant_metadata(uav, 4); unsigned range_size = get_constant_metadata(uav, 5); if (bind_register == UINT32_MAX && bind_space == UINT32_MAX) { // This seems to be possible in RT shaders when explicit register() is missing? LOGE("Nonsensical UAV binding detected.\n"); return false; } auto resource_kind = static_cast(get_constant_metadata(uav, 6)); // Magic resource that does not actually exist. if (index == ags.uav_magic_resource_type_index || index == nvapi.uav_magic_resource_type_index) continue; bool has_counter = get_constant_metadata(uav, 8) != 0; bool is_rov = get_constant_metadata(uav, 9) != 0; // ROV implies coherent in Vulkan memory models. bool globally_coherent = get_constant_metadata(uav, 7) != 0 || is_rov; llvm::MDNode *tags = nullptr; if (uav->getNumOperands() >= 11 && uav->getOperand(10)) tags = llvm::dyn_cast(uav->getOperand(10)); unsigned stride = 0; spv::ImageFormat format = spv::ImageFormatUnknown; auto actual_component_type = DXIL::ComponentType::U32; auto effective_component_type = actual_component_type; auto &access_meta = uav_access_tracking[index]; if (globally_coherent) execution_mode_meta.declares_globallycoherent_uav = true; if (is_rov) execution_mode_meta.declares_rov = true; // We shouldn't need this, but dxilconv is broken. if (access_meta.has_counter) has_counter = true; // If the shader has device-memory memory barriers, we need to support this. // GLSL450 memory model does not do this for us by default. // coherent: memory variable where reads and writes are coherent with reads and // writes from other shader invocations // We have two options: // - slap Coherent on it. // - Use Vulkan memory model and make use of MakeVisibleKHR/MakeAvailableKHR flags in a OpMemoryBarrier. // This would flush and invalidate any incoherent caches as necessary. // For now, slapping coherent on all UAVs is good enough. // When we move to full Vulkan memory model we can do a slightly better job. // If no UAV actually needs globallycoherent we can demote any barriers to workgroup barriers, // which is hopefully more optimal if the compiler understands the intent ... // Only promote resources which actually need some kind of coherence. if (shader_analysis.require_uav_thread_group_coherence && access_meta.has_written && access_meta.has_read && execution_mode_meta.memory_model == spv::MemoryModelGLSL450) { globally_coherent = true; } if (resource_kind == DXIL::ResourceKind::FeedbackTexture2D || resource_kind == DXIL::ResourceKind::FeedbackTexture2DArray) { // 64-bit atomics make things a bit nicer. actual_component_type = DXIL::ComponentType::U64; effective_component_type = get_effective_typed_resource_type(actual_component_type); } else if (tags && get_constant_metadata(tags, 0) == 0) { // Sampled format. actual_component_type = normalize_component_type(static_cast(get_constant_metadata(tags, 1))); if (access_meta.has_atomic_64bit) { // The component type in DXIL is u32, even if the resource itself is u64 in meta reflection data ... // This is also the case for signed components. Always use R64UI here. actual_component_type = DXIL::ComponentType::U64; } effective_component_type = get_effective_typed_resource_type(actual_component_type); } else { // Structured/Raw buffers, just use uint for good measure, we'll bitcast as needed. // Field 1 is stride, but we don't care about that unless we will support an SSBO path. format = spv::ImageFormatR32ui; if (tags) stride = get_constant_metadata(tags, 1); } unsigned alignment = resource_kind == DXIL::ResourceKind::RawBuffer ? 16 : (stride & -int(stride)); if (!get_uav_image_format(resource_kind, actual_component_type, access_meta, format)) return false; DescriptorTableEntry local_table_entry = {}; int local_root_signature_entry = get_local_root_signature_entry( ResourceClass::UAV, bind_space, bind_register, local_table_entry); bool need_resource_remapping = local_root_signature_entry < 0 || local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table; D3DUAVBinding d3d_binding = {}; d3d_binding.counter = has_counter; d3d_binding.binding = { get_remapping_stage(execution_model), resource_kind, index, bind_space, bind_register, range_size, alignment }; VulkanUAVBinding vulkan_binding = { { bind_space, bind_register }, { bind_space + 1, bind_register }, {} }; if (need_resource_remapping && resource_mapping_iface && !resource_mapping_iface->remap_uav(d3d_binding, vulkan_binding)) { // We may be rejected if the unbound range has 1 non-bindless descriptor. bool retry = d3d_binding.binding.range_size == UINT32_MAX; if (retry) { d3d_binding.binding.range_size = 1; range_size = 1; } if (!retry || !resource_mapping_iface->remap_uav(d3d_binding, vulkan_binding)) { LOGE("Failed to remap UAV %u:%u.\n", bind_space, bind_register); return false; } } AliasedAccess aliased_access; if (!analyze_aliased_access(access_meta, need_resource_remapping ? vulkan_binding.buffer_binding.descriptor_type : VulkanDescriptorType::BufferDeviceAddress, aliased_access)) { return false; } uav_index_to_reference.resize(std::max(uav_index_to_reference.size(), size_t(index + 1))); uav_index_to_counter.resize(std::max(uav_index_to_counter.size(), size_t(index + 1))); uav_index_to_offset.resize(std::max(uav_index_to_offset.size(), size_t(index + 1))); if (!get_ssbo_offset_buffer_id(uav_index_to_offset[index], vulkan_binding.buffer_binding, vulkan_binding.offset_binding, resource_kind, alignment)) return false; if (range_size != 1) { if (range_size == ~0u) { builder.addExtension("SPV_EXT_descriptor_indexing"); builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT); } if (has_counter) { builder.addExtension("SPV_EXT_descriptor_indexing"); if (vulkan_binding.counter_binding.descriptor_type == VulkanDescriptorType::SSBO) builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing); else builder.addCapability(spv::CapabilityStorageTexelBufferArrayDynamicIndexingEXT); } if ((resource_kind == DXIL::ResourceKind::StructuredBuffer || resource_kind == DXIL::ResourceKind::RawBuffer) && vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO) { builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing); } else if (resource_kind == DXIL::ResourceKind::StructuredBuffer || resource_kind == DXIL::ResourceKind::RawBuffer || resource_kind == DXIL::ResourceKind::TypedBuffer) { builder.addExtension("SPV_EXT_descriptor_indexing"); builder.addCapability(spv::CapabilityStorageTexelBufferArrayDynamicIndexingEXT); } else builder.addCapability(spv::CapabilityStorageImageArrayDynamicIndexing); } BindlessInfo bindless_info = {}; bindless_info.type = DXIL::ResourceType::UAV; bindless_info.component = effective_component_type; bindless_info.kind = resource_kind; bindless_info.desc_set = vulkan_binding.buffer_binding.descriptor_set; bindless_info.binding = vulkan_binding.buffer_binding.binding; bindless_info.format = format; bindless_info.uav_read = access_meta.has_read; bindless_info.uav_written = access_meta.has_written; bindless_info.uav_coherent = globally_coherent; bindless_info.descriptor_type = vulkan_binding.buffer_binding.descriptor_type; bindless_info.relaxed_precision = actual_component_type != effective_component_type && component_type_is_16bit(actual_component_type); bindless_info.debug.stride = stride; // If we emit two SSBOs which both access the same buffer, we must emit Aliased decoration to be safe. bindless_info.aliased = aliased_access.requires_alias_decoration; BindlessInfo counter_info = {}; counter_info.type = DXIL::ResourceType::UAV; counter_info.component = DXIL::ComponentType::U32; counter_info.desc_set = vulkan_binding.counter_binding.descriptor_set; counter_info.binding = vulkan_binding.counter_binding.binding; if (vulkan_binding.counter_binding.descriptor_type == VulkanDescriptorType::SSBO) { counter_info.kind = DXIL::ResourceKind::RawBuffer; counter_info.counters = true; } else if (options.physical_storage_buffer && vulkan_binding.counter_binding.descriptor_type != VulkanDescriptorType::TexelBuffer) { counter_info.kind = DXIL::ResourceKind::Invalid; counter_info.counters = true; } else { counter_info.kind = DXIL::ResourceKind::TypedBuffer; counter_info.uav_read = true; counter_info.uav_written = true; counter_info.uav_coherent = globally_coherent; counter_info.format = spv::ImageFormatR32ui; } ReferenceVkMemoryModel vkmm = {}; if (execution_mode_meta.memory_model == spv::MemoryModelVulkan) { // For UAV we just slap it on everything. vkmm.non_private = true; vkmm.auto_visibility = globally_coherent || is_rov; } if (local_root_signature_entry >= 0) { auto &entry = local_root_signature[local_root_signature_entry]; if (entry.type == LocalRootSignatureType::Table) { if (!vulkan_binding.buffer_binding.bindless.use_heap) { LOGE("Table SBT entries must be bindless.\n"); return false; } if (!var_meta.is_lib_variable) { LOGE("Local root signature requires global lib variables.\n"); return false; } uint32_t heap_offset = local_table_entry.offset_in_heap; heap_offset += bind_register - local_table_entry.register_index; auto &ref = uav_index_to_reference[index]; if (aliased_access.requires_alias_decoration) { ref.var_alias_group = create_bindless_heap_variable_alias_group( bindless_info, aliased_access.raw_declarations); } else if (aliased_access.override_primary_component_types) { auto tmp_info = bindless_info; tmp_info.component = aliased_access.primary_component_type; tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize; ref.var_id = create_bindless_heap_variable(tmp_info); } else { ref.var_id = create_bindless_heap_variable(bindless_info); } ref.aliased = aliased_access.requires_alias_decoration; ref.base_offset = heap_offset; ref.stride = stride; ref.bindless = true; ref.base_resource_is_array = range_size != 1; ref.local_root_signature_entry = local_root_signature_entry; ref.resource_kind = resource_kind; ref.vkmm = vkmm; if (has_counter) { if (!vulkan_binding.counter_binding.bindless.use_heap) { LOGE("Table SBT entries must be bindless.\n"); return false; } heap_offset = local_table_entry.offset_in_heap; heap_offset += bind_register - local_table_entry.register_index; spv::Id counter_var_id = create_bindless_heap_variable(counter_info); auto &counter_ref = uav_index_to_counter[index]; counter_ref.var_id = counter_var_id; counter_ref.base_offset = heap_offset; counter_ref.stride = 4; counter_ref.bindless = true; counter_ref.base_resource_is_array = range_size != 1; counter_ref.local_root_signature_entry = local_root_signature_entry; // Signals the underlying type of the counter buffer. counter_ref.resource_kind = counter_info.counters ? DXIL::ResourceKind::RawBuffer : DXIL::ResourceKind::TypedBuffer; } } else { // Otherwise, we simply refer to the SBT directly to obtain a pointer. if (resource_kind != DXIL::ResourceKind::RawBuffer && resource_kind != DXIL::ResourceKind::StructuredBuffer) { LOGE("UAV SBT root descriptors must be raw buffers or structures buffers.\n"); return false; } auto &ref = uav_index_to_reference[index]; ref.var_id = shader_record_buffer_id; ref.stride = stride; ref.local_root_signature_entry = local_root_signature_entry; ref.resource_kind = resource_kind; ref.vkmm = vkmm; if (range_size != 1) { LOGE("Cannot use descriptor array for root descriptors.\n"); return false; } } } else if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::BufferDeviceAddress) { if (resource_kind != DXIL::ResourceKind::RawBuffer && resource_kind != DXIL::ResourceKind::StructuredBuffer) { LOGE("BDA root descriptors must be raw buffers or structured buffers.\n"); return false; } auto &ref = uav_index_to_reference[index]; ref.var_id = root_constant_id; ref.root_descriptor = true; ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index; ref.coherent = globally_coherent; ref.rov = is_rov; ref.stride = stride; ref.resource_kind = resource_kind; ref.vkmm = vkmm; if (options.extended_non_semantic_info) emit_root_parameter_index_from_push_index("UAV", ref.push_constant_member, 8, true); if (range_size != 1) { LOGE("Cannot use descriptor array for root descriptors.\n"); return false; } } else if (vulkan_binding.buffer_binding.bindless.use_heap) { // DXIL already applies the t# register offset to any dynamic index, so counteract that here. // The exception is with lib_* where we access resources by variable, not through // createResource() >_____<. uint32_t heap_offset = vulkan_binding.buffer_binding.bindless.heap_root_offset; if (range_size != 1 && !var_meta.is_lib_variable) heap_offset -= bind_register; auto &ref = uav_index_to_reference[index]; if (aliased_access.requires_alias_decoration) { ref.var_alias_group = create_bindless_heap_variable_alias_group( bindless_info, aliased_access.raw_declarations); } else if (aliased_access.override_primary_component_types) { auto tmp_info = bindless_info; tmp_info.component = aliased_access.primary_component_type; tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize; ref.var_id = create_bindless_heap_variable(tmp_info); } else { ref.var_id = create_bindless_heap_variable(bindless_info); } ref.aliased = aliased_access.requires_alias_decoration; ref.push_constant_member = vulkan_binding.buffer_binding.root_constant_index + root_descriptor_count; ref.base_offset = heap_offset; ref.stride = stride; ref.bindless = true; ref.coherent = globally_coherent; ref.rov = is_rov; ref.base_resource_is_array = range_size != 1; ref.resource_kind = resource_kind; ref.vkmm = vkmm; if (options.extended_non_semantic_info) { emit_root_parameter_index_from_push_index( "ResourceTable", vulkan_binding.buffer_binding.root_constant_index, 4, false); } if (has_counter) { if (vulkan_binding.counter_binding.bindless.use_heap) { spv::Id counter_var_id = create_bindless_heap_variable(counter_info); heap_offset = vulkan_binding.counter_binding.bindless.heap_root_offset; if (range_size != 1 && !var_meta.is_lib_variable) heap_offset -= bind_register; auto &counter_ref = uav_index_to_counter[index]; counter_ref.var_id = counter_var_id; counter_ref.push_constant_member = vulkan_binding.counter_binding.root_constant_index + root_descriptor_count; counter_ref.base_offset = heap_offset; counter_ref.stride = 4; counter_ref.bindless = true; counter_ref.base_resource_is_array = range_size != 1; // Signals the underlying type of the counter buffer. counter_ref.resource_kind = counter_info.kind; } else { LOGE("If base UAV uses bindless heap, UAV counter must also do so.\n"); return false; } } } else { spv::Id var_id = 0; Vector var_alias_group; spv::StorageClass storage; if (vulkan_binding.buffer_binding.descriptor_type == VulkanDescriptorType::SSBO) { // TODO: Consider implementing aliased buffers which all refer to the same buffer, // but which can exploit alignment per-instruction. // This is impractical, since BufferLoad/Store in DXIL does not have alignment (4 bytes is assumed), // so just unroll. // To make good use of this, we'll need apps to use SM 6.2 RawBufferLoad/Store, which does have explicit alignment. // We'll likely need to mess around with Aliased decoration as well, which might have other effects ... storage = spv::StorageClassStorageBuffer; if (aliased_access.requires_alias_decoration) var_alias_group = create_raw_ssbo_variable_alias_group(aliased_access.raw_declarations, range_size, name); else { assert(aliased_access.raw_declarations.size() == 1); var_id = create_raw_ssbo_variable(aliased_access.raw_declarations.front(), range_size, name); } } else { // Treat default as texel buffer, as it's the more compatible way of implementing buffer types in DXIL. auto element_type_id = get_type_id(effective_component_type, 1, 1); spv::Id type_id = builder.makeImageType(element_type_id, image_dimension_from_resource_kind(resource_kind), false, image_dimension_is_arrayed(resource_kind), image_dimension_is_multisampled(resource_kind), 2, format); if (range_size != 1) { if (range_size == ~0u) type_id = builder.makeRuntimeArray(type_id); else type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); } storage = spv::StorageClassUniformConstant; var_id = create_variable(storage, type_id, name.empty() ? nullptr : name.c_str()); if (actual_component_type != effective_component_type && component_type_is_16bit(actual_component_type)) builder.addDecoration(var_id, spv::DecorationRelaxedPrecision); } auto &ref = uav_index_to_reference[index]; ref.var_id = var_id; ref.var_alias_group = std::move(var_alias_group); ref.aliased = aliased_access.requires_alias_decoration; ref.stride = stride; ref.coherent = globally_coherent; ref.rov = is_rov; ref.base_resource_is_array = range_size != 1; ref.resource_kind = resource_kind; ref.vkmm = vkmm; // Counteract any offsets. if (range_size != 1 && !var_meta.is_lib_variable) ref.base_offset = -bind_register; const auto decorate_variable = [&](spv::Id id) { builder.addDecoration(id, spv::DecorationDescriptorSet, vulkan_binding.buffer_binding.descriptor_set); builder.addDecoration(id, spv::DecorationBinding, vulkan_binding.buffer_binding.binding); if (!access_meta.has_read) builder.addDecoration(id, spv::DecorationNonReadable); if (!access_meta.has_written) builder.addDecoration(id, spv::DecorationNonWritable); if (globally_coherent && execution_mode_meta.memory_model == spv::MemoryModelGLSL450) builder.addDecoration(id, spv::DecorationCoherent); if (aliased_access.requires_alias_decoration) builder.addDecoration(id, spv::DecorationAliased); }; if (var_id) decorate_variable(var_id); for (auto &var : ref.var_alias_group) decorate_variable(var.var_id); spv::Id counter_var_id = 0; if (has_counter) { if (vulkan_binding.counter_binding.bindless.use_heap) { LOGE("Cannot use bindless UAV counters along with non-bindless UAVs.\n"); return false; } spv::StorageClass counter_storage; spv::Id type_id; if (vulkan_binding.counter_binding.descriptor_type == VulkanDescriptorType::SSBO) { spv::Id uint_type = builder.makeUintType(32); type_id = builder.makeStructType({ uint_type }, "AtomicCounterSSBO"); builder.addDecoration(type_id, spv::DecorationBlock); builder.addMemberName(type_id, 0, "counter"); builder.addMemberDecoration(type_id, 0, spv::DecorationOffset, 0); counter_storage = spv::StorageClassStorageBuffer; } else { // Treat default as texel buffer, as it's the more compatible way of implementing buffer types in DXIL. auto element_type_id = get_type_id(DXIL::ComponentType::U32, 1, 1); type_id = builder.makeImageType( element_type_id, spv::DimBuffer, false, false, false, 2, format); counter_storage = spv::StorageClassUniformConstant; } if (range_size != 1) { if (range_size == ~0u) type_id = builder.makeRuntimeArray(type_id); else type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); } counter_var_id = create_variable(counter_storage, type_id, name.empty() ? nullptr : (name + "Counter").c_str()); builder.addDecoration(counter_var_id, spv::DecorationDescriptorSet, vulkan_binding.counter_binding.descriptor_set); builder.addDecoration(counter_var_id, spv::DecorationBinding, vulkan_binding.counter_binding.binding); auto &counter_ref = uav_index_to_counter[index]; counter_ref.var_id = counter_var_id; counter_ref.stride = 4; counter_ref.base_resource_is_array = range_size != 1; counter_ref.resource_kind = counter_storage == spv::StorageClassStorageBuffer ? DXIL::ResourceKind::RawBuffer : DXIL::ResourceKind::TypedBuffer; } if (var_id) { auto &meta = handle_to_resource_meta[var_id]; meta = {}; meta.kind = resource_kind; meta.stride = stride; meta.var_id = var_id; meta.storage = storage; meta.component_type = actual_component_type; meta.vkmm = vkmm; if (aliased_access.override_primary_component_types) { meta.component_type = aliased_access.primary_component_type; meta.raw_component_vecsize = aliased_access.primary_raw_vecsize; } } for (auto &var : ref.var_alias_group) { auto &meta = handle_to_resource_meta[var.var_id]; meta = {}; meta.kind = resource_kind; meta.stride = stride; meta.var_id = var.var_id; meta.storage = storage; meta.component_type = raw_width_to_component_type(var.declaration.type, var.declaration.width); meta.raw_component_vecsize = var.declaration.vecsize; meta.vkmm = vkmm; } } } return true; } bool Converter::Impl::emit_cbvs(const llvm::MDNode *cbvs, const llvm::MDNode *refl) { auto &builder = spirv_module.get_builder(); unsigned num_cbvs = cbvs->getNumOperands(); for (unsigned i = 0; i < num_cbvs; i++) { auto *cbv = llvm::cast(cbvs->getOperand(i)); auto var_meta = get_resource_variable_meta(cbv); if (!var_meta.is_active) continue; unsigned index = get_constant_metadata(cbv, 0); auto name = get_resource_name_metadata(cbv, refl); unsigned bind_space = get_constant_metadata(cbv, 3); unsigned bind_register = get_constant_metadata(cbv, 4); unsigned range_size = get_constant_metadata(cbv, 5); unsigned cbv_size = get_constant_metadata(cbv, 6); if (bind_register == UINT32_MAX && bind_space == UINT32_MAX) { // This seems to be possible in RT shaders when explicit register() is missing? LOGE("Nonsensical CBV binding detected.\n"); return false; } DescriptorTableEntry local_table_entry = {}; int local_root_signature_entry = get_local_root_signature_entry( ResourceClass::CBV, bind_space, bind_register, local_table_entry); bool need_resource_remapping = local_root_signature_entry < 0 || local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table; D3DBinding d3d_binding = { get_remapping_stage(execution_model), DXIL::ResourceKind::CBuffer, index, bind_space, bind_register, range_size, 0 }; VulkanCBVBinding vulkan_binding = {}; vulkan_binding.buffer = { bind_space, bind_register }; if (need_resource_remapping && resource_mapping_iface && !resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding)) { // We may be rejected if the unbound range has 1 non-bindless descriptor. bool retry = d3d_binding.range_size == UINT32_MAX; if (retry) { d3d_binding.range_size = 1; range_size = 1; } if (!retry || !resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding)) { LOGE("Failed to remap CBV %u:%u.\n", bind_space, bind_register); return false; } } auto &access_meta = cbv_access_tracking[index]; AliasedAccess aliased_access; if (!analyze_aliased_access(access_meta, VulkanDescriptorType::UBO, aliased_access)) return false; cbv_index_to_reference.resize(std::max(cbv_index_to_reference.size(), size_t(index + 1))); if (range_size != 1) { if (range_size == ~0u) { builder.addExtension("SPV_EXT_descriptor_indexing"); builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT); } if (vulkan_binding.buffer.bindless.use_heap && options.bindless_cbv_ssbo_emulation) builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing); else builder.addCapability(spv::CapabilityUniformBufferArrayDynamicIndexing); } BindlessInfo bindless_info = {}; bindless_info.type = DXIL::ResourceType::CBV; bindless_info.kind = DXIL::ResourceKind::CBuffer; bindless_info.desc_set = vulkan_binding.buffer.descriptor_set; bindless_info.binding = vulkan_binding.buffer.binding; bindless_info.component = aliased_access.primary_component_type; bindless_info.raw_vecsize = aliased_access.primary_raw_vecsize; if (local_root_signature_entry >= 0) { auto &entry = local_root_signature[local_root_signature_entry]; if (entry.type == LocalRootSignatureType::Table) { if (!vulkan_binding.buffer.bindless.use_heap) { LOGE("Table SBT entries must be bindless.\n"); return false; } uint32_t heap_offset = local_table_entry.offset_in_heap; heap_offset += bind_register - local_table_entry.register_index; if (!var_meta.is_lib_variable) { LOGE("Local root signature requires global lib variables.\n"); return false; } auto &ref = cbv_index_to_reference[index]; if (aliased_access.requires_alias_decoration) { ref.var_alias_group = create_bindless_heap_variable_alias_group( bindless_info, aliased_access.raw_declarations); } else { ref.var_id = create_bindless_heap_variable(bindless_info); } ref.base_offset = heap_offset; ref.base_resource_is_array = range_size != 1; ref.bindless = true; ref.local_root_signature_entry = local_root_signature_entry; ref.resource_kind = DXIL::ResourceKind::CBuffer; } else { auto &ref = cbv_index_to_reference[index]; ref.var_id = shader_record_buffer_id; ref.local_root_signature_entry = local_root_signature_entry; ref.resource_kind = DXIL::ResourceKind::CBuffer; if (range_size != 1) { LOGE("Cannot use descriptor array for root descriptors.\n"); return false; } } } else if (vulkan_binding.push_constant) { if (root_constant_id == 0) { LOGE("Must have setup push constant block to use root constant path.\n"); return false; } auto &ref = cbv_index_to_reference[index]; ref.var_id = root_constant_id; ref.push_constant_member = vulkan_binding.push.offset_in_words + root_descriptor_count; ref.resource_kind = DXIL::ResourceKind::CBuffer; if (options.extended_non_semantic_info) emit_root_parameter_index_from_push_index("Constant", vulkan_binding.push.offset_in_words, cbv_size, false); } else if (vulkan_binding.buffer.descriptor_type == VulkanDescriptorType::BufferDeviceAddress) { auto &ref = cbv_index_to_reference[index]; ref.var_id = root_constant_id; ref.root_descriptor = true; ref.push_constant_member = vulkan_binding.buffer.root_constant_index; ref.resource_kind = DXIL::ResourceKind::CBuffer; if (options.extended_non_semantic_info) emit_root_parameter_index_from_push_index("CBV", ref.push_constant_member, 8, true); if (range_size != 1) { LOGE("Cannot use descriptor array for root descriptors.\n"); return false; } } else if (vulkan_binding.buffer.bindless.use_heap) { // DXIL already applies the t# register offset to any dynamic index, so counteract that here. // The exception is with lib_* where we access resources by variable, not through // createResource() >_____<. uint32_t heap_offset = vulkan_binding.buffer.bindless.heap_root_offset; if (range_size != 1 && !var_meta.is_lib_variable) heap_offset -= bind_register; auto &ref = cbv_index_to_reference[index]; if (aliased_access.requires_alias_decoration) { ref.var_alias_group = create_bindless_heap_variable_alias_group( bindless_info, aliased_access.raw_declarations); } else { ref.var_id = create_bindless_heap_variable(bindless_info); } ref.push_constant_member = vulkan_binding.buffer.root_constant_index + root_descriptor_count; ref.base_offset = heap_offset; ref.base_resource_is_array = range_size != 1; ref.bindless = true; ref.resource_kind = DXIL::ResourceKind::CBuffer; if (options.extended_non_semantic_info) emit_root_parameter_index_from_push_index("ResourceTable", vulkan_binding.buffer.root_constant_index, 4, false); } else { auto &ref = cbv_index_to_reference[index]; if (aliased_access.requires_alias_decoration) { ref.var_alias_group = create_ubo_variable_alias_group( aliased_access.raw_declarations, range_size, name, cbv_size); } else { assert(aliased_access.raw_declarations.size() == 1); ref.var_id = create_ubo_variable(aliased_access.raw_declarations.front(), range_size, name, cbv_size); } ref.base_resource_is_array = range_size != 1; ref.resource_kind = DXIL::ResourceKind::CBuffer; // Counteract any offsets. if (range_size != 1 && !var_meta.is_lib_variable) ref.base_offset = -bind_register; if (ref.var_id) { auto &meta = handle_to_resource_meta[ref.var_id]; meta = {}; meta.kind = ref.resource_kind; meta.var_id = ref.var_id; meta.storage = spv::StorageClassUniform; meta.component_type = aliased_access.primary_component_type; meta.raw_component_vecsize = aliased_access.primary_raw_vecsize; builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set); builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding); } for (auto &var : ref.var_alias_group) { auto &meta = handle_to_resource_meta[var.var_id]; meta = {}; meta.kind = ref.resource_kind; meta.var_id = var.var_id; meta.storage = spv::StorageClassUniform; meta.component_type = raw_width_to_component_type(var.declaration.type, var.declaration.width); meta.raw_component_vecsize = var.declaration.vecsize; builder.addDecoration(meta.var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer.descriptor_set); builder.addDecoration(meta.var_id, spv::DecorationBinding, vulkan_binding.buffer.binding); } if (options.extended_non_semantic_info) { emit_root_parameter_index_from_push_index("PushCBV", Converter::pack_desc_set_binding_to_virtual_offset( vulkan_binding.buffer.descriptor_set, vulkan_binding.buffer.binding), 0, false); } } } return true; } bool Converter::Impl::emit_samplers(const llvm::MDNode *samplers, const llvm::MDNode *refl) { auto &builder = spirv_module.get_builder(); unsigned num_samplers = samplers->getNumOperands(); for (unsigned i = 0; i < num_samplers; i++) { auto *sampler = llvm::cast(samplers->getOperand(i)); auto var_meta = get_resource_variable_meta(sampler); if (!var_meta.is_active) continue; unsigned index = get_constant_metadata(sampler, 0); auto name = get_resource_name_metadata(sampler, refl); unsigned bind_space = get_constant_metadata(sampler, 3); unsigned bind_register = get_constant_metadata(sampler, 4); unsigned range_size = get_constant_metadata(sampler, 5); if (bind_register == UINT32_MAX && bind_space == UINT32_MAX) { // This seems to be possible in RT shaders when explicit register() is missing? LOGE("Nonsensical Sampler binding detected.\n"); return false; } if (range_size != 1) { if (range_size == ~0u) { builder.addExtension("SPV_EXT_descriptor_indexing"); builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT); } // This capability also covers samplers. builder.addCapability(spv::CapabilitySampledImageArrayDynamicIndexing); } DescriptorTableEntry local_table_entry = {}; int local_root_signature_entry = get_local_root_signature_entry( ResourceClass::Sampler, bind_space, bind_register, local_table_entry); bool need_resource_remapping = local_root_signature_entry < 0 || local_root_signature[local_root_signature_entry].type == LocalRootSignatureType::Table; D3DBinding d3d_binding = { get_remapping_stage(execution_model), DXIL::ResourceKind::Sampler, index, bind_space, bind_register, range_size, 0 }; VulkanBinding vulkan_binding = { bind_space, bind_register }; if (need_resource_remapping && resource_mapping_iface && !resource_mapping_iface->remap_sampler(d3d_binding, vulkan_binding)) { // We may be rejected if the unbound range has 1 non-bindless descriptor. bool retry = d3d_binding.range_size == UINT32_MAX; if (retry) { d3d_binding.range_size = 1; range_size = 1; } if (!retry || !resource_mapping_iface->remap_sampler(d3d_binding, vulkan_binding)) { LOGE("Failed to remap sampler %u:%u.\n", bind_space, bind_register); return false; } } sampler_index_to_reference.resize(std::max(sampler_index_to_reference.size(), size_t(index + 1))); BindlessInfo bindless_info = {}; bindless_info.type = DXIL::ResourceType::Sampler; bindless_info.kind = DXIL::ResourceKind::Sampler; bindless_info.desc_set = vulkan_binding.descriptor_set; bindless_info.binding = vulkan_binding.binding; if (local_root_signature_entry >= 0) { // Samplers can only live in table entries. if (!vulkan_binding.bindless.use_heap) { LOGE("Table SBT entries must be bindless.\n"); return false; } spv::Id var_id = create_bindless_heap_variable(bindless_info); uint32_t heap_offset = local_table_entry.offset_in_heap; heap_offset += bind_register - local_table_entry.register_index; if (!var_meta.is_lib_variable) { LOGE("Local root signature requires global lib variables.\n"); return false; } auto &ref = sampler_index_to_reference[index]; ref.var_id = var_id; ref.base_offset = heap_offset; ref.bindless = true; ref.local_root_signature_entry = local_root_signature_entry; ref.base_resource_is_array = range_size != 1; ref.resource_kind = DXIL::ResourceKind::Sampler; } else if (vulkan_binding.bindless.use_heap) { spv::Id var_id = create_bindless_heap_variable(bindless_info); // DXIL already applies the t# register offset to any dynamic index, so counteract that here. // The exception is with lib_* where we access resources by variable, not through // createResource() >_____<. uint32_t heap_offset = vulkan_binding.bindless.heap_root_offset; if (range_size != 1 && !var_meta.is_lib_variable) heap_offset -= bind_register; auto &ref = sampler_index_to_reference[index]; ref.var_id = var_id; ref.push_constant_member = vulkan_binding.root_constant_index + root_descriptor_count; ref.base_offset = heap_offset; ref.bindless = true; ref.base_resource_is_array = range_size != 1; ref.resource_kind = DXIL::ResourceKind::Sampler; if (options.extended_non_semantic_info) emit_root_parameter_index_from_push_index("SamplerTable", vulkan_binding.root_constant_index, 4, false); } else { spv::Id type_id = builder.makeSamplerType(); if (range_size != 1) { if (range_size == ~0u) type_id = builder.makeRuntimeArray(type_id); else type_id = builder.makeArrayType(type_id, builder.makeUintConstant(range_size), 0); } spv::Id var_id = create_variable(spv::StorageClassUniformConstant, type_id, name.empty() ? nullptr : name.c_str()); builder.addDecoration(var_id, spv::DecorationDescriptorSet, vulkan_binding.descriptor_set); builder.addDecoration(var_id, spv::DecorationBinding, vulkan_binding.binding); auto &ref = sampler_index_to_reference[index]; ref.var_id = var_id; ref.base_resource_is_array = range_size != 1; ref.resource_kind = DXIL::ResourceKind::Sampler; // Counteract any offsets. if (range_size != 1 && !var_meta.is_lib_variable) ref.base_offset = -bind_register; } } return true; } bool Converter::Impl::scan_srvs(ResourceRemappingInterface *iface, const llvm::MDNode *srvs, ShaderStage stage) { unsigned num_srvs = srvs->getNumOperands(); for (unsigned i = 0; i < num_srvs; i++) { auto *srv = llvm::cast(srvs->getOperand(i)); unsigned index = get_constant_metadata(srv, 0); unsigned bind_space = get_constant_metadata(srv, 3); unsigned bind_register = get_constant_metadata(srv, 4); unsigned range_size = get_constant_metadata(srv, 5); auto resource_kind = static_cast(get_constant_metadata(srv, 6)); D3DBinding d3d_binding = { stage, resource_kind, index, bind_space, bind_register, range_size }; VulkanSRVBinding vulkan_binding = {}; if (iface && !iface->remap_srv(d3d_binding, vulkan_binding)) return false; } return true; } bool Converter::Impl::scan_samplers(ResourceRemappingInterface *iface, const llvm::MDNode *samplers, ShaderStage stage) { unsigned num_samplers = samplers->getNumOperands(); for (unsigned i = 0; i < num_samplers; i++) { auto *sampler = llvm::cast(samplers->getOperand(i)); unsigned index = get_constant_metadata(sampler, 0); unsigned bind_space = get_constant_metadata(sampler, 3); unsigned bind_register = get_constant_metadata(sampler, 4); unsigned range_size = get_constant_metadata(sampler, 5); D3DBinding d3d_binding = { stage, DXIL::ResourceKind::Sampler, index, bind_space, bind_register, range_size }; VulkanBinding vulkan_binding = {}; if (iface && !iface->remap_sampler(d3d_binding, vulkan_binding)) return false; } return true; } bool Converter::Impl::scan_cbvs(ResourceRemappingInterface *iface, const llvm::MDNode *cbvs, ShaderStage stage) { unsigned num_cbvs = cbvs->getNumOperands(); for (unsigned i = 0; i < num_cbvs; i++) { auto *cbv = llvm::cast(cbvs->getOperand(i)); unsigned index = get_constant_metadata(cbv, 0); unsigned bind_space = get_constant_metadata(cbv, 3); unsigned bind_register = get_constant_metadata(cbv, 4); unsigned range_size = get_constant_metadata(cbv, 5); D3DBinding d3d_binding = { stage, DXIL::ResourceKind::CBuffer, index, bind_space, bind_register, range_size }; VulkanCBVBinding vulkan_binding = {}; if (iface && !iface->remap_cbv(d3d_binding, vulkan_binding)) return false; } return true; } bool Converter::Impl::scan_uavs(ResourceRemappingInterface *iface, const llvm::MDNode *uavs, ShaderStage stage) { unsigned num_uavs = uavs->getNumOperands(); for (unsigned i = 0; i < num_uavs; i++) { auto *uav = llvm::cast(uavs->getOperand(i)); unsigned index = get_constant_metadata(uav, 0); unsigned bind_space = get_constant_metadata(uav, 3); unsigned bind_register = get_constant_metadata(uav, 4); unsigned range_size = get_constant_metadata(uav, 5); auto resource_kind = static_cast(get_constant_metadata(uav, 6)); bool has_counter = get_constant_metadata(uav, 8) != 0; D3DUAVBinding d3d_binding = { { stage, resource_kind, index, bind_space, bind_register, range_size }, has_counter }; VulkanUAVBinding vulkan_binding = {}; if (iface && !iface->remap_uav(d3d_binding, vulkan_binding)) return false; } return true; } bool Converter::Impl::require_arrayed_root_constants() const { if (!resource_mapping_iface) return false; auto &module = bitcode_parser.get_module(); auto *resource_meta = module.getNamedMetadata("dx.resources"); if (!resource_meta) return false; auto *metas = resource_meta->getOperand(0); if (!metas->getOperand(2)) return false; auto *cbvs = llvm::dyn_cast(metas->getOperand(2)); if (!cbvs) return false; unsigned num_cbvs = cbvs->getNumOperands(); for (unsigned i = 0; i < num_cbvs; i++) { auto *cbv = llvm::cast(cbvs->getOperand(i)); auto var_meta = get_resource_variable_meta(cbv); if (!var_meta.is_active) continue; unsigned index = get_constant_metadata(cbv, 0); auto itr = cbv_access_tracking.find(index); if (itr == cbv_access_tracking.end()) continue; unsigned bind_space = get_constant_metadata(cbv, 3); unsigned bind_register = get_constant_metadata(cbv, 4); DescriptorTableEntry local_table_entry = {}; int local_root_signature_entry = get_local_root_signature_entry( ResourceClass::CBV, bind_space, bind_register, local_table_entry); if (local_root_signature_entry >= 0) continue; D3DBinding d3d_binding = { get_remapping_stage(execution_model), DXIL::ResourceKind::CBuffer, index, bind_space, bind_register, UINT32_MAX, 0 }; VulkanCBVBinding vulkan_binding = {}; vulkan_binding.buffer = { bind_space, bind_register }; if (!resource_mapping_iface->remap_cbv(d3d_binding, vulkan_binding)) continue; if (!vulkan_binding.push_constant) continue; if (itr->second.dynamically_indexed_cbv) return true; } return false; } void Converter::Impl::emit_root_constants(unsigned num_descriptors, unsigned num_constant_words) { auto &builder = spirv_module.get_builder(); bool array_root_constants = require_arrayed_root_constants(); // Root constants cannot be dynamically indexed in DXIL, so emit them as members. Vector members((array_root_constants ? 1 : num_constant_words) + num_descriptors); // Emit root descriptors as u32x2 to work around missing SGPR promotion on RADV. for (unsigned i = 0; i < num_descriptors; i++) members[i] = builder.makeVectorType(builder.makeUintType(32), 2); if (array_root_constants) { spv::Id type_id = builder.makeUintType(32); type_id = builder.makeArrayType(type_id, builder.makeUintConstant(num_constant_words), 4); builder.addDecoration(type_id, spv::DecorationArrayStride, 4); members[num_descriptors] = type_id; } else { for (unsigned i = 0; i < num_constant_words; i++) members[i + num_descriptors] = builder.makeUintType(32); } spv::Id type_id = get_struct_type(members, 0, "RootConstants"); builder.addDecoration(type_id, spv::DecorationBlock); for (unsigned i = 0; i < num_descriptors; i++) builder.addMemberDecoration(type_id, i, spv::DecorationOffset, sizeof(uint64_t) * i); for (unsigned i = 0; i < (array_root_constants ? 1 : num_constant_words); i++) { builder.addMemberDecoration(type_id, i + num_descriptors, spv::DecorationOffset, sizeof(uint64_t) * num_descriptors + sizeof(uint32_t) * i); } if (array_root_constants) builder.addMemberName(type_id, num_descriptors, "root_constants_and_tables"); if (options.inline_ubo_enable) { root_constant_id = create_variable(spv::StorageClassUniform, type_id, "registers"); builder.addDecoration(root_constant_id, spv::DecorationDescriptorSet, options.inline_ubo_descriptor_set); builder.addDecoration(root_constant_id, spv::DecorationBinding, options.inline_ubo_descriptor_binding); } else root_constant_id = create_variable(spv::StorageClassPushConstant, type_id, "registers"); root_descriptor_count = num_descriptors; root_constant_num_words = num_constant_words; root_constant_arrayed = array_root_constants; } static bool execution_model_is_ray_tracing(spv::ExecutionModel model) { switch (model) { case spv::ExecutionModelRayGenerationKHR: case spv::ExecutionModelCallableKHR: case spv::ExecutionModelIntersectionKHR: case spv::ExecutionModelMissKHR: case spv::ExecutionModelClosestHitKHR: case spv::ExecutionModelAnyHitKHR: return true; default: return false; } } spv::Id Converter::Impl::emit_shader_record_buffer_block_type(bool physical_storage) { if (local_root_signature.empty()) return 0; auto &builder = spirv_module.get_builder(); spv::Id type_id; Vector member_types; Vector offsets; member_types.reserve(local_root_signature.size()); offsets.reserve(local_root_signature.size()); shader_record_buffer_types.reserve(local_root_signature.size()); uint32_t current_offset = 0; for (auto &elem : local_root_signature) { switch (elem.type) { case LocalRootSignatureType::Constants: { spv::Id array_size_id = builder.makeUintConstant(elem.constants.num_words); spv::Id u32_type = builder.makeUintType(32); spv::Id member_type_id = builder.makeArrayType(u32_type, array_size_id, 4); builder.addDecoration(member_type_id, spv::DecorationArrayStride, 4); member_types.push_back(member_type_id); offsets.push_back(current_offset); current_offset += 4 * elem.constants.num_words; shader_record_buffer_types.push_back(member_type_id); break; } case LocalRootSignatureType::Descriptor: { // A 64-bit integer which we will bitcast to a physical storage buffer later. // Emit it as u32x2 as otherwise we don't get SGPR promotion on ACO as of right now. spv::Id member_type_id = builder.makeVectorType(builder.makeUintType(32), 2); member_types.push_back(member_type_id); current_offset = (current_offset + 7) & ~7; offsets.push_back(current_offset); current_offset += 8; shader_record_buffer_types.push_back(member_type_id); break; } case LocalRootSignatureType::Table: { spv::Id member_type_id = builder.makeVectorType(builder.makeUintType(32), 2); member_types.push_back(member_type_id); current_offset = (current_offset + 7) & ~7; offsets.push_back(current_offset); current_offset += 8; shader_record_buffer_types.push_back(member_type_id); break; } default: return false; } } type_id = get_struct_type(member_types, 0, "SBTBlock"); builder.addDecoration(type_id, spv::DecorationBlock); for (size_t i = 0; i < local_root_signature.size(); i++) { builder.addMemberDecoration(type_id, i, spv::DecorationOffset, offsets[i]); if (physical_storage) builder.addMemberDecoration(type_id, i, spv::DecorationNonWritable); } return type_id; } bool Converter::Impl::emit_shader_record_buffer() { spv::Id type_id = emit_shader_record_buffer_block_type(false); if (type_id) shader_record_buffer_id = create_variable(spv::StorageClassShaderRecordBufferKHR, type_id, "SBT"); return true; } static bool local_root_signature_matches(const LocalRootSignatureEntry &entry, ResourceClass resource_class, uint32_t space, uint32_t binding, DescriptorTableEntry &local_table_entry) { switch (entry.type) { case LocalRootSignatureType::Constants: return resource_class == ResourceClass::CBV && entry.constants.register_space == space && entry.constants.register_index == binding; case LocalRootSignatureType::Descriptor: return entry.descriptor.type == resource_class && entry.descriptor.register_space == space && entry.descriptor.register_index == binding; case LocalRootSignatureType::Table: for (auto &table_entry : entry.table_entries) { if (table_entry.type == resource_class && table_entry.register_space == space && table_entry.register_index <= binding && ((table_entry.num_descriptors_in_range == ~0u) || ((binding - table_entry.register_index) < table_entry.num_descriptors_in_range))) { local_table_entry = table_entry; return true; } } return false; default: return false; } } int Converter::Impl::get_local_root_signature_entry(ResourceClass resource_class, uint32_t space, uint32_t binding, DescriptorTableEntry &local_table_entry) const { auto itr = std::find_if(local_root_signature.begin(), local_root_signature.end(), [&](const LocalRootSignatureEntry &entry) { return local_root_signature_matches(entry, resource_class, space, binding, local_table_entry); }); if (itr != local_root_signature.end()) return int(itr - local_root_signature.begin()); else return -1; } bool Converter::Impl::emit_resources_global_mapping() { auto &module = bitcode_parser.get_module(); auto *resource_meta = module.getNamedMetadata("dx.resources"); if (!resource_meta) return true; auto *metas = resource_meta->getOperand(0); if (metas->getOperand(0)) if (!emit_resources_global_mapping(DXIL::ResourceType::SRV, llvm::dyn_cast(metas->getOperand(0)))) return false; if (metas->getOperand(1)) if (!emit_resources_global_mapping(DXIL::ResourceType::UAV, llvm::dyn_cast(metas->getOperand(1)))) return false; if (metas->getOperand(2)) if (!emit_resources_global_mapping(DXIL::ResourceType::CBV, llvm::dyn_cast(metas->getOperand(2)))) return false; if (metas->getOperand(3)) if (!emit_resources_global_mapping(DXIL::ResourceType::Sampler, llvm::dyn_cast(metas->getOperand(3)))) return false; return true; } void Converter::Impl::get_shader_model(const llvm::Module &module, String *model, uint32_t *major, uint32_t *minor) { auto *resource_meta = module.getNamedMetadata("dx.shaderModel"); if (!resource_meta) { if (major) *major = 6; if (minor) *minor = 0; if (model) model->clear(); } else { auto *meta = resource_meta->getOperand(0); if (model) *model = get_string_metadata(meta, 0); if (major) *major = get_constant_metadata(meta, 1); if (minor) *minor = get_constant_metadata(meta, 2); } } Converter::Impl::RawBufferMeta Converter::Impl::get_raw_buffer_meta(DXIL::ResourceType resource_type, unsigned meta_index) { auto &module = bitcode_parser.get_module(); auto *resource_meta = module.getNamedMetadata("dx.resources"); if (!resource_meta) return { DXIL::ResourceKind::Invalid, 0 }; auto *metas = resource_meta->getOperand(0); auto &resource_list = metas->getOperand(uint32_t(resource_type)); if (!resource_list) return { DXIL::ResourceKind::Invalid, 0 }; auto *entries = llvm::cast(resource_list); unsigned num_entries = entries->getNumOperands(); for (unsigned i = 0; i < num_entries; i++) { auto *entry = llvm::cast(entries->getOperand(i)); if (get_constant_metadata(entry, 0) == meta_index) { RawBufferMeta meta = {}; meta.kind = DXIL::ResourceKind(get_constant_metadata(entry, 6)); unsigned tag_index = resource_type == DXIL::ResourceType::SRV ? 8 : 10; llvm::MDNode *tags = nullptr; if (entry->getNumOperands() > tag_index && entry->getOperand(tag_index)) tags = llvm::dyn_cast(entry->getOperand(tag_index)); if (tags) meta.stride = get_constant_metadata(tags, 1); return meta; } } return { DXIL::ResourceKind::Invalid, 0 }; } uint32_t Converter::Impl::find_binding_meta_index(uint32_t binding_range_lo, uint32_t binding_range_hi, uint32_t binding_space, DXIL::ResourceType resource_type) { auto &module = bitcode_parser.get_module(); auto *resource_meta = module.getNamedMetadata("dx.resources"); if (!resource_meta) return UINT32_MAX; auto *metas = resource_meta->getOperand(0); auto &resource_list = metas->getOperand(uint32_t(resource_type)); if (!resource_list) return UINT32_MAX; auto *entries = llvm::cast(resource_list); unsigned num_entries = entries->getNumOperands(); for (unsigned i = 0; i < num_entries; i++) { auto *entry = llvm::cast(entries->getOperand(i)); uint32_t index = get_constant_metadata(entry, 0); uint32_t bind_space = get_constant_metadata(entry, 3); uint32_t bind_register = get_constant_metadata(entry, 4); uint32_t range_size = get_constant_metadata(entry, 5); if (binding_space != bind_space) continue; if (binding_range_lo >= bind_register && (range_size == UINT32_MAX || (binding_range_hi < bind_register + range_size))) { return index; } } return UINT32_MAX; } bool Converter::Impl::emit_descriptor_heap_size_ubo() { spv::Id u32_type = builder().makeUintType(32); spv::Id block_type = builder().makeStructType({ u32_type }, "DescriptorHeapSizeUBO"); builder().addMemberName(block_type, 0, "count"); builder().addDecoration(block_type, spv::DecorationBlock); builder().addMemberDecoration(block_type, 0, spv::DecorationOffset, 0); auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::ResourceDescriptorHeapSize)]; spv::Id var_id = create_variable(spv::StorageClassUniform, block_type, "DescriptorHeapSize"); builder().addDecoration(var_id, spv::DecorationDescriptorSet, mapping.desc_set); builder().addDecoration(var_id, spv::DecorationBinding, mapping.desc_binding); instrumentation.descriptor_heap_size_var_id = var_id; return true; } bool Converter::Impl::emit_descriptor_heap_introspection_buffer() { if (instrumentation.descriptor_heap_introspection_var_id != 0) return true; // We need to know the size of the descriptor heap. Rather than passing this // through a separate descriptor, we can just query the SSBO size of the // side-band SSBO. It is designed to have a size equal to the descriptor heap. // Somewhat hacky is that we can ask for a global heap of RTAS, which gets us this descriptor. VulkanSRVBinding vulkan_binding = {}; auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::RawDescriptorHeapView)]; if (mapping.kind != MetaDescriptorKind::ReadonlySSBO && mapping.kind != MetaDescriptorKind::UBOContainingBDA && mapping.kind != MetaDescriptorKind::Invalid) return false; bool use_full_descriptor = mapping.kind != MetaDescriptorKind::UBOContainingBDA; if (mapping.kind == MetaDescriptorKind::Invalid) { // Legacy proxy. The RTAS heap does what we want in the legacy model. D3DBinding d3d_binding = { get_remapping_stage(execution_model), DXIL::ResourceKind::RTAccelerationStructure, 0, UINT32_MAX, UINT32_MAX, UINT32_MAX, 0, }; if (!resource_mapping_iface->remap_srv(d3d_binding, vulkan_binding)) return false; if (vulkan_binding.buffer_binding.descriptor_type != VulkanDescriptorType::SSBO && vulkan_binding.buffer_binding.descriptor_type != VulkanDescriptorType::Identity) { LOGE("Dummy SSBO must be an SSBO.\n"); return false; } } else { vulkan_binding.buffer_binding.descriptor_set = mapping.desc_set; vulkan_binding.buffer_binding.binding = mapping.desc_binding; } if (options.physical_address_descriptor_stride == 0) { LOGE("physical_address_descriptor_stride must be set.\n"); return false; } spv::Id u32_type = builder().makeUintType(32); uint32_t elems = options.physical_address_descriptor_stride; if (options.instruction_instrumentation.enabled || !use_full_descriptor) u32_type = builder().makeVectorType(u32_type, 2); else elems *= 2; spv::Id u32_array_type = builder().makeArrayType(u32_type, builder().makeUintConstant(elems), 0); builder().addDecoration(u32_array_type, spv::DecorationArrayStride, options.instruction_instrumentation.enabled || !use_full_descriptor ? 8 : 4); spv::Id inner_struct_type = get_struct_type({ u32_array_type }, 0, "DescriptorHeapRawPayload"); builder().addMemberDecoration(inner_struct_type, 0, spv::DecorationOffset, 0); spv::Id inner_struct_array_type = builder().makeRuntimeArray(inner_struct_type); builder().addDecoration(inner_struct_array_type, spv::DecorationArrayStride, 8u * options.physical_address_descriptor_stride); bool sync_val = options.instruction_instrumentation.enabled && options.instruction_instrumentation.type == InstructionInstrumentationType::BufferSynchronizationValidation; spv::Id block_type_id = get_struct_type( { inner_struct_array_type }, 0, use_full_descriptor ? "DescriptorHeapRobustnessSSBO" : "DescriptorHeapRawBlock"); builder().addDecoration(block_type_id, spv::DecorationBlock); builder().addMemberDecoration(block_type_id, 0, spv::DecorationOffset, 0); if (!sync_val) { builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonWritable); if (use_full_descriptor) builder().addMemberDecoration(block_type_id, 0, spv::DecorationNonReadable); } builder().addMemberName(block_type_id, 0, "descriptors"); spv::Id var_id; if (use_full_descriptor) { var_id = create_variable(spv::StorageClassStorageBuffer, block_type_id, "DescriptorHeapRobustness"); } else { // Wrap the descriptor as a plain BDA. spv::Id ptr_type = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, block_type_id); spv::Id ubo_block_type = builder().makeStructType({ ptr_type }, "DescriptorHeapRawPayloadPtr"); builder().addMemberName(ubo_block_type, 0, "ptr"); builder().addMemberDecoration(ubo_block_type, 0, spv::DecorationOffset, 0); builder().addDecoration(ubo_block_type, spv::DecorationBlock); var_id = create_variable(spv::StorageClassUniform, ubo_block_type, "DescriptorHeapRaw"); instrumentation.descriptor_heap_introspection_block_ptr_type_id = ptr_type; } builder().addDecoration(var_id, spv::DecorationDescriptorSet, vulkan_binding.buffer_binding.descriptor_set); builder().addDecoration(var_id, spv::DecorationBinding, vulkan_binding.buffer_binding.binding); instrumentation.descriptor_heap_introspection_var_id = var_id; instrumentation.descriptor_heap_introspection_is_bda = !use_full_descriptor; if (sync_val) { instrumentation.invocation_id_var_id = create_variable(spv::StorageClassPrivate, builder().makeUintType(32), "InvocationID"); } return true; } bool Converter::Impl::emit_global_heaps() { Vector annotations; for (auto &use : llvm_annotate_handle_uses) annotations.push_back(&use.second); // Ensure reproducible codegen since we iterate over an unordered map. std::sort(annotations.begin(), annotations.end(), [](const AnnotateHandleReference *a, const AnnotateHandleReference *b) { return a->ordinal < b->ordinal; }); for (auto *annotation : annotations) { BindlessInfo info = {}; auto actual_component_type = DXIL::ComponentType::U32; info.format = spv::ImageFormatUnknown; if (annotation->resource_type != DXIL::ResourceType::CBV && annotation->resource_kind != DXIL::ResourceKind::RawBuffer && annotation->resource_kind != DXIL::ResourceKind::StructuredBuffer) { actual_component_type = normalize_component_type(annotation->component_type); if (annotation->tracking.has_atomic_64bit) { // The component type in DXIL is u32, even if the resource itself is u64 in meta reflection data ... // This is also the case for signed components. Always use R64UI here. actual_component_type = DXIL::ComponentType::U64; } } else if (annotation->resource_type == DXIL::ResourceType::UAV) { info.format = spv::ImageFormatR32ui; } auto effective_component_type = get_effective_typed_resource_type(actual_component_type); info.type = annotation->resource_type; info.component = effective_component_type; info.kind = annotation->resource_kind; info.relaxed_precision = actual_component_type != effective_component_type && component_type_is_16bit(actual_component_type); if (info.type == DXIL::ResourceType::UAV) { // See emit_uavs() for details around coherent and memory model shenanigans ... if (annotation->coherent) execution_mode_meta.declares_globallycoherent_uav = true; if (annotation->rov) execution_mode_meta.declares_rov = true; // Do not attempt to track read and write here to figure out if this resource in particular needs to be coherent. // It's plausible that the write and read can happen across // two different accesses to ResourceDescriptorHeap[]. Don't take any chances here ... if (shader_analysis.require_uav_thread_group_coherence && execution_mode_meta.memory_model == spv::MemoryModelGLSL450) { annotation->coherent = true; } if (annotation->resource_kind == DXIL::ResourceKind::StructuredBuffer || annotation->resource_kind == DXIL::ResourceKind::RawBuffer) { // In case there is aliasing through different declarations, // we cannot emit NonWritable or NonReadable safely. Assume full read-write. // Be a bit careful with typed resources since it's not always supported with read-write + typed. annotation->tracking.has_read = true; annotation->tracking.has_written = true; } info.uav_coherent = annotation->coherent || annotation->rov; info.uav_read = annotation->tracking.has_read; info.uav_written = annotation->tracking.has_written; if (!get_uav_image_format(annotation->resource_kind, actual_component_type, annotation->tracking, info.format)) { return false; } } unsigned stride = annotation->stride; unsigned alignment = info.kind == DXIL::ResourceKind::RawBuffer ? 16 : (stride & -int(stride)); D3DBinding d3d_binding = { get_remapping_stage(execution_model), info.kind, 0, UINT32_MAX, UINT32_MAX, UINT32_MAX, alignment, }; VulkanBinding vulkan_binding = {}; bool remap_success = false; if (resource_mapping_iface) { switch (info.type) { case DXIL::ResourceType::SRV: { VulkanSRVBinding vulkan_srv_binding = {}; remap_success = resource_mapping_iface->remap_srv(d3d_binding, vulkan_srv_binding); vulkan_binding = vulkan_srv_binding.buffer_binding; if (!get_ssbo_offset_buffer_id(annotation->offset_buffer_id, vulkan_srv_binding.buffer_binding, vulkan_srv_binding.offset_binding, annotation->resource_kind, alignment)) { return false; } break; } case DXIL::ResourceType::UAV: { VulkanUAVBinding vulkan_uav_binding = {}; D3DUAVBinding d3d_uav_binding = {}; d3d_uav_binding.binding = d3d_binding; d3d_uav_binding.counter = annotation->counter; remap_success = resource_mapping_iface->remap_uav(d3d_uav_binding, vulkan_uav_binding); vulkan_binding = vulkan_uav_binding.buffer_binding; if (!get_ssbo_offset_buffer_id(annotation->offset_buffer_id, vulkan_uav_binding.buffer_binding, vulkan_uav_binding.offset_binding, annotation->resource_kind, alignment)) { return false; } if (annotation->counter) { auto &counter_binding = vulkan_uav_binding.counter_binding; BindlessInfo counter_info = {}; annotation->counter_reference.base_resource_is_array = true; annotation->counter_reference.push_constant_member = UINT32_MAX; annotation->counter_reference.stride = 4; annotation->counter_reference.bindless = true; counter_info.type = DXIL::ResourceType::UAV; counter_info.component = DXIL::ComponentType::U32; counter_info.desc_set = counter_binding.descriptor_set; counter_info.binding = counter_binding.binding; if (counter_binding.descriptor_type == VulkanDescriptorType::SSBO) { counter_info.kind = DXIL::ResourceKind::RawBuffer; counter_info.counters = true; } else if (options.physical_storage_buffer && counter_binding.descriptor_type != VulkanDescriptorType::TexelBuffer) { counter_info.kind = DXIL::ResourceKind::Invalid; counter_info.counters = true; } else { counter_info.kind = DXIL::ResourceKind::TypedBuffer; counter_info.uav_read = true; counter_info.uav_written = true; counter_info.uav_coherent = false; counter_info.format = spv::ImageFormatR32ui; } annotation->counter_reference.resource_kind = counter_info.kind; annotation->counter_reference.var_id = create_bindless_heap_variable(counter_info); } break; } case DXIL::ResourceType::CBV: { VulkanCBVBinding vulkan_cbv_binding = {}; remap_success = resource_mapping_iface->remap_cbv(d3d_binding, vulkan_cbv_binding); if (vulkan_cbv_binding.push_constant) { LOGE("Cannot use push constants for SM 6.6 bindless.\n"); return false; } vulkan_binding = vulkan_cbv_binding.buffer; vulkan_binding.descriptor_type = VulkanDescriptorType::UBO; break; } case DXIL::ResourceType::Sampler: remap_success = resource_mapping_iface->remap_sampler(d3d_binding, vulkan_binding); break; } } if (!remap_success) return false; if (!vulkan_binding.bindless.use_heap) { LOGE("SM 6.6 bindless references must be bindless.\n"); return false; } AliasedAccess aliased_access; if (!analyze_aliased_access(annotation->tracking, vulkan_binding.descriptor_type, aliased_access)) return false; info.desc_set = vulkan_binding.descriptor_set; info.binding = vulkan_binding.binding; info.descriptor_type = vulkan_binding.descriptor_type; info.aliased = aliased_access.requires_alias_decoration; info.debug.stride = annotation->stride; annotation->reference.bindless = true; annotation->reference.base_resource_is_array = true; annotation->reference.push_constant_member = UINT32_MAX; annotation->reference.stride = annotation->stride; annotation->reference.resource_kind = annotation->resource_kind; annotation->reference.coherent = annotation->coherent || annotation->rov; annotation->reference.rov = annotation->rov; if (execution_mode_meta.memory_model == spv::MemoryModelVulkan) { annotation->reference.vkmm.non_private = info.type == DXIL::ResourceType::UAV; annotation->reference.vkmm.auto_visibility = annotation->coherent || annotation->rov; } if (aliased_access.requires_alias_decoration) { annotation->reference.var_alias_group = create_bindless_heap_variable_alias_group( info, aliased_access.raw_declarations); } else if (aliased_access.override_primary_component_types) { auto tmp_info = info; tmp_info.component = aliased_access.primary_component_type; tmp_info.raw_vecsize = aliased_access.primary_raw_vecsize; annotation->reference.var_id = create_bindless_heap_variable(tmp_info); } else annotation->reference.var_id = create_bindless_heap_variable(info); annotation->reference.aliased = aliased_access.requires_alias_decoration; } return true; } bool Converter::Impl::emit_ray_query_globals() { if (shader_analysis.ray_query.uses_non_direct_indexing) { auto &b = builder(); spv::Id type_id = b.makeRayQueryType(); if (shader_analysis.ray_query.uses_divergent_handles) { type_id = b.makeArrayType( type_id, b.makeUintConstant(shader_analysis.ray_query.num_ray_query_alloca), 0); } ray_query.global_query_objects_id = create_variable(spv::StorageClassPrivate, type_id, "RayQueryHeap"); } return true; } bool Converter::Impl::emit_resources() { unsigned num_root_descriptors = 0; unsigned num_root_constant_words = 0; if (resource_mapping_iface) { num_root_descriptors = resource_mapping_iface->get_root_descriptor_count(); num_root_constant_words = resource_mapping_iface->get_root_constant_word_count(); } if (num_root_constant_words != 0 || num_root_descriptors != 0) emit_root_constants(num_root_descriptors, num_root_constant_words); if (execution_model_is_ray_tracing(execution_model)) if (!emit_shader_record_buffer()) return false; if (!emit_global_heaps()) return false; if (options.descriptor_heap_robustness) { auto &mapping = options.meta_descriptor_mappings[int(MetaDescriptor::ResourceDescriptorHeapSize)]; if (mapping.kind == MetaDescriptorKind::UBOContainingConstant) { // Use legacy path. if (!emit_descriptor_heap_size_ubo()) return false; } else { if (!emit_descriptor_heap_introspection_buffer()) return false; } } if (options.instruction_instrumentation.enabled && (options.instruction_instrumentation.type == InstructionInstrumentationType::ExpectAssume || options.instruction_instrumentation.type == InstructionInstrumentationType::BufferSynchronizationValidation)) { // Failure is not a big deal. emit_descriptor_heap_introspection_buffer(); } auto &module = bitcode_parser.get_module(); auto *resource_meta = module.getNamedMetadata("dx.resources"); if (!resource_meta) return true; auto *metas = resource_meta->getOperand(0); llvm::MDNode *reflection_metas = nullptr; if (bitcode_reflection_parser) { auto &reflection_module = bitcode_reflection_parser->get_module(); auto *reflection_resource_meta = reflection_module.getNamedMetadata("dx.resources"); if (reflection_resource_meta) reflection_metas = reflection_resource_meta->getOperand(0); } const llvm::MDNode *reflection_type_metas[4] = {}; const llvm::MDNode *type_metas[4] = {}; for (unsigned i = 0; i < 4; i++) { if (metas->getOperand(i)) { type_metas[i] = llvm::dyn_cast(metas->getOperand(i)); if (reflection_metas) reflection_type_metas[i] = llvm::dyn_cast(reflection_metas->getOperand(i)); } } if (type_metas[0]) if (!emit_srvs(type_metas[0], reflection_type_metas[0])) return false; if (type_metas[1]) if (!emit_uavs(type_metas[1], reflection_type_metas[1])) return false; if (type_metas[2]) if (!emit_cbvs(type_metas[2], reflection_type_metas[2])) return false; if (type_metas[3]) if (!emit_samplers(type_metas[3], reflection_type_metas[3])) return false; for (auto &alloc : alloca_tracking) { // Now that we have emitted resources, we can determine which alloca -> CBV punchthroughs to accept. if (!analyze_alloca_cbv_forwarding_post_resource_emit(*this, alloc.second)) return false; } if (!emit_ray_query_globals()) return false; return true; } void Converter::Impl::scan_resources(ResourceRemappingInterface *iface, const LLVMBCParser &bitcode_parser) { auto &module = bitcode_parser.get_module(); auto *resource_meta = module.getNamedMetadata("dx.resources"); if (!resource_meta) return; auto *metas = resource_meta->getOperand(0); auto stage = get_shader_stage(bitcode_parser); if (metas->getOperand(0)) if (!scan_srvs(iface, llvm::dyn_cast(metas->getOperand(0)), stage)) return; if (metas->getOperand(1)) if (!scan_uavs(iface, llvm::dyn_cast(metas->getOperand(1)), stage)) return; if (metas->getOperand(2)) if (!scan_cbvs(iface, llvm::dyn_cast(metas->getOperand(2)), stage)) return; if (metas->getOperand(3)) if (!scan_samplers(iface, llvm::dyn_cast(metas->getOperand(3)), stage)) return; } ShaderStage Converter::Impl::get_remapping_stage(spv::ExecutionModel execution_model) { switch (execution_model) { case spv::ExecutionModelVertex: return ShaderStage::Vertex; case spv::ExecutionModelTessellationControl: return ShaderStage::Hull; case spv::ExecutionModelTessellationEvaluation: return ShaderStage::Domain; case spv::ExecutionModelGeometry: return ShaderStage::Geometry; case spv::ExecutionModelFragment: return ShaderStage::Pixel; case spv::ExecutionModelGLCompute: return ShaderStage::Compute; case spv::ExecutionModelIntersectionKHR: return ShaderStage::Intersection; case spv::ExecutionModelClosestHitKHR: return ShaderStage::ClosestHit; case spv::ExecutionModelMissKHR: return ShaderStage::Miss; case spv::ExecutionModelAnyHitKHR: return ShaderStage::AnyHit; case spv::ExecutionModelRayGenerationKHR: return ShaderStage::RayGeneration; case spv::ExecutionModelCallableKHR: return ShaderStage::Callable; case spv::ExecutionModelTaskEXT: return ShaderStage::Amplification; case spv::ExecutionModelMeshEXT: return ShaderStage::Mesh; default: return ShaderStage::Unknown; } } static inline float half_to_float(uint16_t u16_value) { // Based on the GLM implementation. int s = (u16_value >> 15) & 0x1; int e = (u16_value >> 10) & 0x1f; int m = (u16_value >> 0) & 0x3ff; union { float f32; uint32_t u32; } u; if (e == 0) { if (m == 0) { u.u32 = uint32_t(s) << 31; return u.f32; } else { while ((m & 0x400) == 0) { m <<= 1; e--; } e++; m &= ~0x400; } } else if (e == 31) { if (m == 0) { u.u32 = (uint32_t(s) << 31) | 0x7f800000u; return u.f32; } else { u.u32 = (uint32_t(s) << 31) | 0x7f800000u | (m << 13); return u.f32; } } e += 127 - 15; m <<= 13; u.u32 = (uint32_t(s) << 31) | (e << 23) | m; return u.f32; } spv::Id Converter::Impl::get_padded_constant_array(spv::Id padded_type_id, const llvm::Constant *constant) { auto &builder = spirv_module.get_builder(); assert(constant->getType()->getTypeID() == llvm::Type::TypeID::ArrayTyID); Vector constituents; if (llvm::isa(constant)) { return builder.makeNullConstant(padded_type_id); } else if (auto *agg = llvm::dyn_cast(constant)) { constituents.reserve(agg->getNumOperands() + 1); for (unsigned i = 0; i < agg->getNumOperands(); i++) { llvm::Constant *c = agg->getOperand(i); if (const auto *undef = llvm::dyn_cast(c)) constituents.push_back(get_id_for_undef_constant(undef)); else constituents.push_back(get_id_for_constant(c, 0)); } } else if (auto *array = llvm::dyn_cast(constant)) { constituents.reserve(array->getType()->getArrayNumElements() + 1); for (unsigned i = 0; i < array->getNumElements(); i++) { llvm::Constant *c = array->getElementAsConstant(i); if (const auto *undef = llvm::dyn_cast(c)) constituents.push_back(get_id_for_undef_constant(undef)); else constituents.push_back(get_id_for_constant(c, 0)); } } else return 0; constituents.push_back(builder.makeNullConstant(get_type_id(constant->getType()->getArrayElementType()))); return builder.makeCompositeConstant(padded_type_id, constituents); } spv::Id Converter::Impl::get_id_for_constant(const llvm::Constant *constant, unsigned forced_width) { auto &builder = spirv_module.get_builder(); switch (constant->getType()->getTypeID()) { case llvm::Type::TypeID::HalfTyID: { auto *fp = llvm::cast(constant); auto f16 = uint16_t(fp->getValueAPF().bitcastToAPInt().getZExtValue()); if (support_native_fp16_operations()) return builder.makeFloat16Constant(f16); else return builder.makeFloatConstant(half_to_float(f16)); } case llvm::Type::TypeID::FloatTyID: { auto *fp = llvm::cast(constant); return builder.makeFloatConstant(fp->getValueAPF().convertToFloat()); } case llvm::Type::TypeID::DoubleTyID: { auto *fp = llvm::cast(constant); return builder.makeDoubleConstant(fp->getValueAPF().convertToDouble()); } case llvm::Type::TypeID::IntegerTyID: { unsigned integer_width = forced_width ? forced_width : constant->getType()->getIntegerBitWidth(); int physical_width = physical_integer_bit_width(integer_width); switch (physical_width) { case 1: return builder.makeBoolConstant(constant->getUniqueInteger().getZExtValue() != 0); case 16: return builder.makeUint16Constant(constant->getUniqueInteger().getZExtValue()); case 32: return builder.makeUintConstant(constant->getUniqueInteger().getZExtValue()); case 64: return builder.makeUint64Constant(constant->getUniqueInteger().getZExtValue()); default: return 0; } } case llvm::Type::TypeID::VectorTyID: case llvm::Type::TypeID::ArrayTyID: case llvm::Type::TypeID::StructTyID: { Vector constituents; spv::Id type_id = get_type_id(constant->getType()); if (llvm::isa(constant)) { return builder.makeNullConstant(type_id); } else if (auto *agg = llvm::dyn_cast(constant)) { constituents.reserve(agg->getNumOperands()); for (unsigned i = 0; i < agg->getNumOperands(); i++) { llvm::Constant *c = agg->getOperand(i); if (const auto *undef = llvm::dyn_cast(c)) constituents.push_back(get_id_for_undef_constant(undef)); else constituents.push_back(get_id_for_constant(c, 0)); } } else if (auto *array = llvm::dyn_cast(constant)) { constituents.reserve(array->getType()->getArrayNumElements()); for (unsigned i = 0; i < array->getNumElements(); i++) { llvm::Constant *c = array->getElementAsConstant(i); if (const auto *undef = llvm::dyn_cast(c)) constituents.push_back(get_id_for_undef_constant(undef)); else constituents.push_back(get_id_for_constant(c, 0)); } } else if (auto *vec = llvm::dyn_cast(constant)) { constituents.reserve(vec->getType()->getVectorNumElements()); for (unsigned i = 0; i < vec->getNumElements(); i++) { llvm::Constant *c = vec->getElementAsConstant(i); if (const auto *undef = llvm::dyn_cast(c)) constituents.push_back(get_id_for_undef_constant(undef)); else constituents.push_back(get_id_for_constant(c, 0)); } } else return 0; return builder.makeCompositeConstant(type_id, constituents); } default: return 0; } } spv::Id Converter::Impl::get_id_for_undef(const llvm::UndefValue *undef) { auto &builder = spirv_module.get_builder(); if (shader_analysis.global_undefs) return builder.createUndefinedConstant(get_type_id(undef->getType())); else return builder.createUndefined(get_type_id(undef->getType())); } spv::Id Converter::Impl::get_id_for_undef_constant(const llvm::UndefValue *undef) { auto &builder = spirv_module.get_builder(); return builder.createUndefinedConstant(get_type_id(undef->getType())); } spv::Id Converter::Impl::get_id_for_value(const llvm::Value *value, unsigned forced_width) { assert(value); // Constant expressions must be stamped out every place it is used, // since it technically lives at global scope. // Do not cache this value in the value map. if (auto *cexpr = llvm::dyn_cast(value)) return build_constant_expression(*this, cexpr); auto itr = value_map.find(value); if (itr != value_map.end()) return itr->second; spv::Id ret; if (auto *undef = llvm::dyn_cast(value)) ret = get_id_for_undef(undef); else if (auto *constant = llvm::dyn_cast(value)) ret = get_id_for_constant(constant, forced_width); else ret = spirv_module.allocate_id(); value_map[value] = ret; return ret; } static llvm::MDNode *get_entry_point_meta(const llvm::Module &module, const char *entry) { auto *ep_meta = module.getNamedMetadata("dx.entryPoints"); unsigned num_entry_points = ep_meta->getNumOperands(); for (unsigned i = 0; i < num_entry_points; i++) { auto *node = ep_meta->getOperand(i); if (node) { auto &func_node = node->getOperand(0); if (func_node) if (!entry || (Converter::entry_point_matches(get_string_metadata(node, 1), entry))) return node; } } // dxilconv can emit null hull shader with non-null patch constant function ... *shrug* // I suppose we need to deal with that too. if (!entry && num_entry_points) { auto *node = ep_meta->getOperand(0); if (node) return node; } return nullptr; } static llvm::MDNode *get_null_entry_point_meta(const llvm::Module &module) { // In DXR, a dummy entry point with null function pointer owns the shader flags for whatever reason ... auto *ep_meta = module.getNamedMetadata("dx.entryPoints"); unsigned num_entry_points = ep_meta->getNumOperands(); for (unsigned i = 0; i < num_entry_points; i++) { auto *node = ep_meta->getOperand(i); if (node) { auto &func_node = node->getOperand(0); if (!func_node) return node; } } return nullptr; } Vector Converter::get_entry_points(const LLVMBCParser &parser) { Vector result; auto &module = parser.get_module(); auto *ep_meta = module.getNamedMetadata("dx.entryPoints"); unsigned num_entry_points = ep_meta->getNumOperands(); result.reserve(num_entry_points); for (unsigned i = 0; i < num_entry_points; i++) { auto *node = ep_meta->getOperand(i); if (node) { auto &func_node = node->getOperand(0); if (func_node) result.push_back(get_string_metadata(node, 1)); } } return result; } bool Converter::entry_point_matches(const String &mangled, const char *user) { if (is_mangled_entry_point(user)) return mangled == user; else return demangle_entry_point(mangled) == user; } static String get_entry_point_name(llvm::MDNode *node) { if (!node) return {}; auto &name_node = node->getOperand(1); if (name_node) { auto *str_node = llvm::dyn_cast(name_node); if (str_node) return get_string_metadata(node, 1); } return {}; } static llvm::Function *get_entry_point_function(llvm::MDNode *node) { if (!node) return nullptr; auto &func_node = node->getOperand(0); if (func_node) return llvm::dyn_cast(llvm::cast(func_node)->getValue()); else return nullptr; } static const llvm::MDOperand *get_shader_property_tag(const llvm::MDNode *func_meta, DXIL::ShaderPropertyTag tag) { if (func_meta && func_meta->getNumOperands() >= 5 && func_meta->getOperand(4)) { auto *tag_values = llvm::dyn_cast(func_meta->getOperand(4)); unsigned num_pairs = tag_values->getNumOperands() / 2; for (unsigned i = 0; i < num_pairs; i++) if (tag == static_cast(get_constant_metadata(tag_values, 2 * i))) return &tag_values->getOperand(2 * i + 1); } return nullptr; } static bool get_execution_model_lib_target(const llvm::Module &module, llvm::MDNode *entry_point_meta) { String model; Converter::Impl::get_shader_model(module, &model, nullptr, nullptr); return model == "lib"; } static spv::ExecutionModel get_execution_model(const llvm::Module &module, llvm::MDNode *entry_point_meta) { if (auto *tag = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::ShaderKind)) { if (!tag) return spv::ExecutionModelMax; auto shader_kind = static_cast( llvm::cast(*tag)->getValue()->getUniqueInteger().getZExtValue()); switch (shader_kind) { case DXIL::ShaderKind::Pixel: return spv::ExecutionModelFragment; case DXIL::ShaderKind::Vertex: return spv::ExecutionModelVertex; case DXIL::ShaderKind::Hull: return spv::ExecutionModelTessellationControl; case DXIL::ShaderKind::Domain: return spv::ExecutionModelTessellationEvaluation; case DXIL::ShaderKind::Geometry: return spv::ExecutionModelGeometry; case DXIL::ShaderKind::Compute: case DXIL::ShaderKind::Node: return spv::ExecutionModelGLCompute; case DXIL::ShaderKind::Amplification: return spv::ExecutionModelTaskEXT; case DXIL::ShaderKind::Mesh: return spv::ExecutionModelMeshEXT; case DXIL::ShaderKind::RayGeneration: return spv::ExecutionModelRayGenerationKHR; case DXIL::ShaderKind::Miss: return spv::ExecutionModelMissKHR; case DXIL::ShaderKind::ClosestHit: return spv::ExecutionModelClosestHitKHR; case DXIL::ShaderKind::Callable: return spv::ExecutionModelCallableKHR; case DXIL::ShaderKind::AnyHit: return spv::ExecutionModelAnyHitKHR; case DXIL::ShaderKind::Intersection: return spv::ExecutionModelIntersectionKHR; default: break; } } else { // Non-RT shaders tend to rely on having the shader model set in the shaderModel meta node. String model; Converter::Impl::get_shader_model(module, &model, nullptr, nullptr); if (model == "vs") return spv::ExecutionModelVertex; else if (model == "ps") return spv::ExecutionModelFragment; else if (model == "hs") return spv::ExecutionModelTessellationControl; else if (model == "ds") return spv::ExecutionModelTessellationEvaluation; else if (model == "gs") return spv::ExecutionModelGeometry; else if (model == "cs") return spv::ExecutionModelGLCompute; else if (model == "as") return spv::ExecutionModelTaskEXT; else if (model == "ms") return spv::ExecutionModelMeshEXT; } return spv::ExecutionModelMax; } spv::Id Converter::Impl::get_type_id(const llvm::Type *type, TypeLayoutFlags flags) { auto &builder = spirv_module.get_builder(); switch (type->getTypeID()) { case llvm::Type::TypeID::HalfTyID: return builder.makeFloatType(support_native_fp16_operations() ? 16 : 32); case llvm::Type::TypeID::FloatTyID: return builder.makeFloatType(32); case llvm::Type::TypeID::DoubleTyID: return builder.makeFloatType(64); case llvm::Type::TypeID::IntegerTyID: if (type->getIntegerBitWidth() == 1) return builder.makeBoolType(); else { auto width = physical_integer_bit_width(type->getIntegerBitWidth()); return builder.makeIntegerType(width, false); } case llvm::Type::TypeID::PointerTyID: { if (DXIL::AddressSpace(type->getPointerAddressSpace()) != DXIL::AddressSpace::PhysicalNodeIO || (flags & TYPE_LAYOUT_PHYSICAL_BIT) == 0) { // Have to deal with this from the outside. Should only be relevant for getelementptr and instructions like that. LOGE("Cannot reliably convert LLVM pointer type, we cannot differentiate between Function and Private.\n"); std::terminate(); } // This is free-flowing BDA in DXIL. We'll deal with it as-is. // Main complication is that we have to emit Offset information ourselves. spv::Id pointee_type = get_type_id(type->getPointerElementType(), flags); return builder.makePointer(spv::StorageClassPhysicalStorageBuffer, pointee_type); } case llvm::Type::TypeID::ArrayTyID: { if (type->getArrayNumElements() == 0) return 0; spv::Id array_size_id; spv::Id element_type_id; // dxbc2dxil emits broken code for TGSM. It's an array of i8 which is absolute nonsense. // It then bitcasts the pointer to i32, which isn't legal either. if ((flags & TYPE_LAYOUT_PHYSICAL_BIT) == 0 && type->getArrayElementType()->getTypeID() == llvm::Type::TypeID::IntegerTyID && type->getArrayElementType()->getIntegerBitWidth() == 8 && type->getArrayNumElements() % 4 == 0) { array_size_id = builder.makeUintConstant(type->getArrayNumElements() / 4); element_type_id = builder.makeUintType(32); } else { array_size_id = builder.makeUintConstant(type->getArrayNumElements()); element_type_id = get_type_id(type->getArrayElementType(), flags & ~TYPE_LAYOUT_BLOCK_BIT); } if ((flags & TYPE_LAYOUT_PHYSICAL_BIT) != 0) { auto size_stride = get_physical_size_for_type(element_type_id); uint32_t stride = size_stride.size; // We always use scalar layout. for (auto &cached_type : cached_physical_array_types) if (cached_type.element_type_id == element_type_id && cached_type.array_size_id == array_size_id) return cached_type.id; spv::Id array_type_id = builder.makeArrayType(element_type_id, array_size_id, stride); builder.addDecoration(array_type_id, spv::DecorationArrayStride, stride); cached_physical_array_types.push_back({ array_type_id, element_type_id, array_size_id }); return array_type_id; } else { // glslang emitter deduplicates. return builder.makeArrayType(element_type_id, array_size_id, 0); } } case llvm::Type::TypeID::StructTyID: { auto *struct_type = llvm::cast(type); Vector member_types; member_types.reserve(struct_type->getStructNumElements()); for (unsigned i = 0; i < struct_type->getStructNumElements(); i++) member_types.push_back(get_type_id(struct_type->getStructElementType(i), flags & ~TYPE_LAYOUT_BLOCK_BIT)); return get_struct_type(member_types, flags, ""); } case llvm::Type::TypeID::VectorTyID: { auto *vec_type = llvm::cast(type); return builder.makeVectorType(get_type_id(vec_type->getElementType()), vec_type->getVectorNumElements()); } case llvm::Type::TypeID::VoidTyID: return builder.makeVoidType(); default: return 0; } } Converter::Impl::SizeAlignment Converter::Impl::get_physical_size_for_type(spv::Id type_id) { SizeAlignment res = {}; if (builder().isScalarType(type_id)) { res.size = builder().getScalarTypeWidth(type_id) / 8; res.alignment = res.size; } else if (builder().isVectorType(type_id)) { res = get_physical_size_for_type(builder().getContainedTypeId(type_id)); res.size *= builder().getNumComponents(type_id); } else if (builder().isArrayType(type_id)) { res = get_physical_size_for_type(builder().getContainedTypeId(type_id)); uint32_t array_size = builder().getNumTypeConstituents(type_id); // Alignment is inherited from constituent, we do scalar block layout here. res.size *= array_size; } else if (builder().isStructType(type_id)) { int num_members = builder().getNumTypeConstituents(type_id); for (int i = 0; i < num_members; i++) { uint32_t member_type_id = builder().getContainedTypeId(type_id, i); auto member_res = get_physical_size_for_type(member_type_id); res.size = (res.size + member_res.alignment - 1) & ~(member_res.alignment - 1); res.size += member_res.size; res.alignment = std::max(res.alignment, member_res.alignment); } res.size = (res.size + res.alignment - 1) & ~(res.alignment - 1); } else if (builder().isPointerType(type_id)) { res.size = sizeof(uint64_t); res.alignment = sizeof(uint64_t); } return res; } void Converter::Impl::decorate_physical_offsets(spv::Id struct_type_id, const Vector &type_ids) { uint32_t offset = 0; int member_index = 0; for (auto &type_id : type_ids) { // DXIL seems to imply scalar alignment for node payload. // It's simple and easy, so just roll with that. auto size_alignment = get_physical_size_for_type(type_id); assert(size_alignment.size != 0); offset = (offset + size_alignment.alignment - 1) & ~(size_alignment.alignment - 1); builder().addMemberDecoration(struct_type_id, member_index, spv::DecorationOffset, offset); offset += size_alignment.size; member_index++; } } spv::Id Converter::Impl::get_struct_type(const Vector &type_ids, TypeLayoutFlags flags, const char *name) { auto itr = std::find_if(cached_struct_types.begin(), cached_struct_types.end(), [&](const StructTypeEntry &entry) -> bool { if (type_ids.size() != entry.subtypes.size()) return false; if (flags != entry.flags) return false; if ((!name && !entry.name.empty()) || (entry.name != name)) return false; for (unsigned i = 0; i < type_ids.size(); i++) if (type_ids[i] != entry.subtypes[i]) return false; return true; }); if (itr == cached_struct_types.end()) { StructTypeEntry entry; entry.subtypes = type_ids; entry.name = name ? name : ""; if ((flags & TYPE_LAYOUT_BLOCK_BIT) != 0) { constexpr TypeLayoutFlags block_flags = TYPE_LAYOUT_BLOCK_BIT | TYPE_LAYOUT_COHERENT_BIT | TYPE_LAYOUT_READ_ONLY_BIT; spv::Id struct_type_id = get_struct_type(type_ids, flags & ~block_flags, entry.name.c_str()); entry.id = builder().makeStructType({ struct_type_id }, entry.name.c_str()); builder().addDecoration(entry.id, spv::DecorationBlock); builder().addMemberDecoration(entry.id, 0, spv::DecorationOffset, 0); if ((flags & TYPE_LAYOUT_COHERENT_BIT) != 0 && execution_mode_meta.memory_model == spv::MemoryModelGLSL450) builder().addMemberDecoration(entry.id, 0, spv::DecorationCoherent); if ((flags & TYPE_LAYOUT_READ_ONLY_BIT) != 0) builder().addMemberDecoration(entry.id, 0, spv::DecorationNonWritable); builder().addMemberName(entry.id, 0, "data"); } else { entry.id = builder().makeStructType(type_ids, entry.name.c_str()); if ((flags & TYPE_LAYOUT_PHYSICAL_BIT) != 0) decorate_physical_offsets(entry.id, type_ids); } entry.flags = flags; spv::Id id = entry.id; cached_struct_types.push_back(std::move(entry)); return id; } else return itr->id; } spv::Id Converter::Impl::get_type_id(DXIL::ComponentType element_type, unsigned rows, unsigned cols, bool force_array) { auto &builder = spirv_module.get_builder(); spv::Id component_type; switch (element_type) { case DXIL::ComponentType::I1: // Cannot have bools in I/O interfaces, these are emitted as 32-bit integers. component_type = builder.makeUintType(32); break; case DXIL::ComponentType::I16: component_type = builder.makeIntegerType(16, true); break; case DXIL::ComponentType::U16: component_type = builder.makeIntegerType(16, false); break; case DXIL::ComponentType::I32: component_type = builder.makeIntegerType(32, true); break; case DXIL::ComponentType::U32: component_type = builder.makeIntegerType(32, false); break; case DXIL::ComponentType::I64: component_type = builder.makeIntegerType(64, true); break; case DXIL::ComponentType::U64: component_type = builder.makeIntegerType(64, false); break; case DXIL::ComponentType::F16: component_type = builder.makeFloatType(16); break; case DXIL::ComponentType::F32: component_type = builder.makeFloatType(32); break; case DXIL::ComponentType::F64: component_type = builder.makeFloatType(64); break; default: LOGE("Unknown component type.\n"); return 0; } if (cols > 1) component_type = builder.makeVectorType(component_type, cols); if (rows > 1 || force_array) component_type = builder.makeArrayType(component_type, builder.makeUintConstant(rows), 0); return component_type; } spv::Id Converter::Impl::get_type_id(spv::Id id) const { auto itr = id_to_type.find(id); if (itr == id_to_type.end()) return 0; else return itr->second; } static bool module_is_ident(llvm::Module &module, const char *ident) { auto *ident_meta = module.getNamedMetadata("llvm.ident"); if (ident_meta) if (auto *arg0 = ident_meta->getOperand(0)) if (auto *str = llvm::dyn_cast(arg0->getOperand(0))) if (str->getString().find(ident) != std::string::npos) return true; return false; } static bool module_is_dxilconv(llvm::Module &module) { return module_is_ident(module, "dxbc2dxil"); } static bool module_is_dxbc_spirv(llvm::Module &module) { return module_is_ident(module, "dxbc-spirv"); } bool Converter::Impl::emit_patch_variables() { auto *node = entry_point_meta; if (!node->getOperand(2)) return true; auto &signature = node->getOperand(2); auto *signature_node = llvm::cast(signature); auto &patch_variables = signature_node->getOperand(2); if (!patch_variables) return true; // There are no control points, and there's no explicit parameter, so force 0. if (patch_location_offset == ~0u) patch_location_offset = 0; // dxilconv is broken and emits patch the fork phase in a way that is non-sensical. // It assumes that you can write outside the bounds of a signature element. // To make this work, we need to lower the patch constant variables from Private variables instead. bool broken_patch_variables = false; if (execution_model == spv::ExecutionModelTessellationControl) broken_patch_variables = module_is_dxilconv(bitcode_parser.get_module()); auto *patch_node = llvm::dyn_cast(patch_variables); auto &builder = spirv_module.get_builder(); spv::StorageClass storage = execution_model == spv::ExecutionModelTessellationEvaluation ? spv::StorageClassInput : spv::StorageClassOutput; unsigned num_broken_user_rows = 0; for (unsigned i = 0; i < patch_node->getNumOperands(); i++) { auto *patch = llvm::cast(patch_node->getOperand(i)); auto element_id = get_constant_metadata(patch, 0); auto semantic_name = get_string_metadata(patch, 1); auto actual_element_type = normalize_component_type(static_cast(get_constant_metadata(patch, 2))); auto effective_element_type = get_effective_input_output_type(actual_element_type); auto system_value = static_cast(get_constant_metadata(patch, 3)); unsigned semantic_index = 0; if (patch->getOperand(4)) semantic_index = get_constant_metadata(llvm::cast(patch->getOperand(4)), 0); auto rows = get_constant_metadata(patch, 6); auto cols = get_constant_metadata(patch, 7); auto start_row = get_constant_metadata(patch, 8); auto start_col = get_constant_metadata(patch, 9); if (system_value == DXIL::Semantic::TessFactor) rows = 4; else if (system_value == DXIL::Semantic::InsideTessFactor) rows = 2; if (broken_patch_variables && system_value == DXIL::Semantic::User) num_broken_user_rows = std::max(num_broken_user_rows, start_row + rows); auto &meta = patch_elements_meta[element_id]; meta.semantic = system_value; // Handle case where shader declares the tess factors twice at different offsets. unsigned semantic_offset = 0; if (system_value == DXIL::Semantic::TessFactor || system_value == DXIL::Semantic::InsideTessFactor) { auto builtin = system_value == DXIL::Semantic::TessFactor ? spv::BuiltInTessLevelOuter : spv::BuiltInTessLevelInner; if (spirv_module.has_builtin_shader_input(builtin)) { meta = {}; meta.id = spirv_module.get_builtin_shader_input(builtin); meta.component_type = actual_element_type; meta.semantic_offset = start_row; meta.semantic = system_value; continue; } } // Application can emit these in ViewInstancing, in which case it's just an offset. if (options.multiview.enable && execution_model == spv::ExecutionModelMeshEXT) { if (system_value == DXIL::Semantic::RenderTargetArrayIndex) multiview.custom_layer_index = true; if (system_value == DXIL::Semantic::ViewPortArrayIndex) multiview.custom_viewport_index = true; } spv::Id type_id; if (system_value == DXIL::Semantic::CullPrimitive) type_id = builder.makeBoolType(); else type_id = get_type_id(effective_element_type, rows, cols); if (execution_model == spv::ExecutionModelMeshEXT) { type_id = builder.makeArrayType( type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_primitive, false), 0); } auto variable_name = semantic_name; if (semantic_index != 0) { variable_name += "_"; variable_name += dxil_spv::to_string(semantic_index); } spv::Id variable_id = create_variable(storage, type_id, variable_name.c_str()); meta.id = variable_id; meta.component_type = actual_element_type; meta.semantic_offset = semantic_offset; meta.start_row = start_row; meta.start_col = start_col; meta.lowering = broken_patch_variables && system_value == DXIL::Semantic::User; if (system_value != DXIL::Semantic::User) { emit_builtin_decoration(variable_id, system_value, storage); } else { // Patch constants are packed together with control point variables, // so we need to apply an offset to make this work in SPIR-V. // The offset is deduced from the control point I/O signature. // TODO: If it's possible to omit trailing CP members in domain shader, we will need to pass this offset // into the compiler. VulkanStageIO vk_io = { start_row + patch_location_offset, start_col, true }; if (resource_mapping_iface) { D3DStageIO d3d_io = { semantic_name.c_str(), semantic_index, start_row, rows }; if (execution_model == spv::ExecutionModelTessellationEvaluation) { if (!resource_mapping_iface->remap_stage_input(d3d_io, vk_io)) return false; } else if (!resource_mapping_iface->remap_stage_output(d3d_io, vk_io)) return false; } builder.addDecoration(variable_id, spv::DecorationLocation, vk_io.location); if (vk_io.component != 0) builder.addDecoration(variable_id, spv::DecorationComponent, vk_io.component); } builder.addDecoration(variable_id, execution_model == spv::ExecutionModelMeshEXT ? spv::DecorationPerPrimitiveEXT : spv::DecorationPatch); } if (num_broken_user_rows) { spv::Id type_id = builder.makeArrayType(builder.makeVectorType(builder.makeUintType(32), 4), builder.makeUintConstant(num_broken_user_rows), 0); execution_mode_meta.patch_lowering_array_var_id = create_variable_with_initializer(spv::StorageClassPrivate, type_id, builder.makeNullConstant(type_id), "PatchLoweringRows"); } return true; } bool Converter::Impl::emit_other_variables() { auto &builder = spirv_module.get_builder(); if (execution_model == spv::ExecutionModelMeshEXT && execution_mode_meta.stage_output_num_primitive) { unsigned index_dim = execution_mode_meta.primitive_index_dimension; if (index_dim) { spv::Id type_id = builder.makeArrayType( get_type_id(DXIL::ComponentType::U32, 1, index_dim), builder.makeUintConstant(execution_mode_meta.stage_output_num_primitive, false), 0); primitive_index_array_id = create_variable(spv::StorageClassOutput, type_id, "indices"); spv::BuiltIn builtin_id = index_dim == 3 ? spv::BuiltInPrimitiveTriangleIndicesEXT : spv::BuiltInPrimitiveLineIndicesEXT; builder.addDecoration(primitive_index_array_id, spv::DecorationBuiltIn, builtin_id); spirv_module.register_builtin_shader_output(primitive_index_array_id, builtin_id); } } return true; } static unsigned get_geometry_shader_stream_index(const llvm::MDNode *node) { if (node->getNumOperands() >= 11 && node->getOperand(10)) { auto *attr = llvm::dyn_cast(node->getOperand(10)); if (!attr) return 0; unsigned num_pairs = attr->getNumOperands() / 2; for (unsigned i = 0; i < num_pairs; i++) { if (static_cast(get_constant_metadata(attr, 2 * i + 0)) == DXIL::GSStageOutTags::Stream) return get_constant_metadata(attr, 2 * i + 1); } } return 0; } static void build_geometry_stream_row_offsets(unsigned offsets[4], const llvm::MDNode *outputs_node) { unsigned row_count_for_geometry_stream[4] = {}; for (unsigned i = 0; i < outputs_node->getNumOperands(); i++) { auto *output = llvm::cast(outputs_node->getOperand(i)); unsigned geometry_stream = get_geometry_shader_stream_index(output); if (geometry_stream < 4) { auto start_row = get_constant_metadata(output, 8); auto rows = get_constant_metadata(output, 6); auto end_rows = rows + start_row; if (end_rows > row_count_for_geometry_stream[geometry_stream]) row_count_for_geometry_stream[geometry_stream] = end_rows; } } for (unsigned row = 0; row < 4; row++) for (unsigned i = 0; i < row; i++) offsets[row] += row_count_for_geometry_stream[i]; } bool Converter::Impl::emit_stage_output_variables() { auto *node = entry_point_meta; if (!node->getOperand(2)) return true; auto &signature = node->getOperand(2); auto *signature_node = llvm::cast(signature); auto &outputs = signature_node->getOperand(1); if (!outputs) return true; auto *outputs_node = llvm::dyn_cast(outputs); auto &builder = spirv_module.get_builder(); unsigned clip_distance_count = 0; unsigned cull_distance_count = 0; bool auto_patch_location = patch_location_offset == ~0u && (execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelMeshEXT); if (auto_patch_location) patch_location_offset = 0; // If we have multiple geometry streams, need to hallucinate locations. // This is okay since we're not going to support multi-stream rasterization anyways. unsigned start_row_for_geometry_stream[4] = {}; if (execution_model == spv::ExecutionModelGeometry) build_geometry_stream_row_offsets(start_row_for_geometry_stream, outputs_node); for (unsigned i = 0; i < outputs_node->getNumOperands(); i++) { auto *output = llvm::cast(outputs_node->getOperand(i)); auto element_id = get_constant_metadata(output, 0); auto semantic_name = get_string_metadata(output, 1); auto actual_element_type = normalize_component_type(static_cast(get_constant_metadata(output, 2))); auto effective_element_type = get_effective_input_output_type(actual_element_type); auto system_value = static_cast(get_constant_metadata(output, 3)); unsigned semantic_index = 0; if (output->getOperand(4)) semantic_index = get_constant_metadata(llvm::cast(output->getOperand(4)), 0); auto interpolation = static_cast(get_constant_metadata(output, 5)); auto rows = get_constant_metadata(output, 6); auto cols = get_constant_metadata(output, 7); auto start_row = get_constant_metadata(output, 8); auto start_col = get_constant_metadata(output, 9); bool masked_output = false; if (options.dual_source_blending && start_row >= 2) { // Mask out writes to unused higher RTs when using dual source blending. continue; } if (auto_patch_location) patch_location_offset = std::max(patch_location_offset, start_row + rows); spv::Id type_id = get_type_id(effective_element_type, rows, cols); if (options.quirks.ignore_primitive_shading_rate && system_value == DXIL::Semantic::ShadingRate) { masked_output = true; } else if (execution_model == spv::ExecutionModelTessellationControl || (execution_model == spv::ExecutionModelTessellationEvaluation && system_value == DXIL::Semantic::ShadingRate)) { // For HS <-> DS, ignore system values. // Shading rate is also ignored in DS. RE4 hits this case. Just treat it as a normal user varying. system_value = DXIL::Semantic::User; } if (system_value == DXIL::Semantic::Position) { type_id = get_type_id(effective_element_type, rows, 4); } else if (system_value == DXIL::Semantic::Coverage) { type_id = builder.makeArrayType(type_id, builder.makeUintConstant(1), 0); } else if (system_value == DXIL::Semantic::ClipDistance) { // DX is rather weird here and you can declare clip distance either as a vector or array, or both! output_clip_cull_meta[element_id] = { clip_distance_count, cols, spv::BuiltInClipDistance }; output_elements_meta[element_id] = { 0, actual_element_type, 0, system_value }; clip_distance_count += rows * cols; continue; } else if (system_value == DXIL::Semantic::CullDistance) { // DX is rather weird here and you can declare clip distance either as a vector or array, or both! output_clip_cull_meta[element_id] = { cull_distance_count, cols, spv::BuiltInCullDistance }; output_elements_meta[element_id] = { 0, actual_element_type, 0, system_value }; cull_distance_count += rows * cols; continue; } // Application can emit these in ViewInstancing, in which case it's just an offset. if (options.multiview.enable) { if (system_value == DXIL::Semantic::RenderTargetArrayIndex) multiview.custom_layer_index = true; if (system_value == DXIL::Semantic::ViewPortArrayIndex) multiview.custom_viewport_index = true; } if (execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelMeshEXT) { type_id = builder.makeArrayType( type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_vertex, false), 0); } auto variable_name = semantic_name; if (semantic_index != 0) { variable_name += "_"; variable_name += dxil_spv::to_string(semantic_index); } spv::Id variable_id = create_variable( masked_output ? spv::StorageClassPrivate : spv::StorageClassOutput, type_id, variable_name.c_str()); output_elements_meta[element_id] = { variable_id, actual_element_type, 0, system_value }; if (effective_element_type != actual_element_type && component_type_is_16bit(actual_element_type)) builder.addDecoration(variable_id, spv::DecorationRelaxedPrecision); if (execution_model == spv::ExecutionModelVertex || execution_model == spv::ExecutionModelGeometry || execution_model == spv::ExecutionModelTessellationEvaluation) { if (resource_mapping_iface) { VulkanStreamOutput vk_output = {}; if (!resource_mapping_iface->remap_stream_output({ semantic_name.c_str(), semantic_index }, vk_output)) return false; if (vk_output.enable) { builder.addCapability(spv::CapabilityTransformFeedback); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeXfb); builder.addDecoration(variable_id, spv::DecorationOffset, vk_output.offset); builder.addDecoration(variable_id, spv::DecorationXfbStride, vk_output.stride); builder.addDecoration(variable_id, spv::DecorationXfbBuffer, vk_output.buffer_index); } } } unsigned geometry_stream = 0; if (execution_model == spv::ExecutionModelGeometry) { geometry_stream = get_geometry_shader_stream_index(output); if (geometry_stream != 0) { builder.addCapability(spv::CapabilityGeometryStreams); builder.addDecoration(variable_id, spv::DecorationStream, geometry_stream); } } if (system_value == DXIL::Semantic::Target) { if (options.dual_source_blending) { assert(start_row == 0 || start_row == 1); if (rows != 1) { LOGE("For dual source blending, number of rows must be 1.\n"); return false; } builder.addDecoration(variable_id, spv::DecorationLocation, 0); builder.addDecoration(variable_id, spv::DecorationIndex, start_row); output_elements_meta[element_id].semantic_offset = 0; } else { builder.addDecoration(variable_id, spv::DecorationLocation, start_row); output_elements_meta[element_id].semantic_offset = start_row; } if (start_col != 0) builder.addDecoration(variable_id, spv::DecorationComponent, start_col); } else if (system_value != DXIL::Semantic::User) { emit_builtin_decoration(variable_id, system_value, spv::StorageClassOutput); } else { if (execution_model == spv::ExecutionModelVertex || execution_model == spv::ExecutionModelTessellationEvaluation || execution_model == spv::ExecutionModelGeometry || execution_model == spv::ExecutionModelMeshEXT) { emit_interpolation_decorations(variable_id, interpolation); } VulkanStageIO vk_output = { start_row, start_col }; if (execution_model == spv::ExecutionModelGeometry && geometry_stream < 4) vk_output.location += start_row_for_geometry_stream[geometry_stream]; if (resource_mapping_iface) { D3DStageIO d3d_output = { semantic_name.c_str(), semantic_index, start_row, rows }; if (!resource_mapping_iface->remap_stage_output(d3d_output, vk_output)) return false; } builder.addDecoration(variable_id, spv::DecorationLocation, vk_output.location); if (vk_output.component != 0) builder.addDecoration(variable_id, spv::DecorationComponent, vk_output.component); } } if (clip_distance_count) { spv::Id type_id = get_type_id(DXIL::ComponentType::F32, clip_distance_count, 1, true); if (execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelMeshEXT) { type_id = builder.makeArrayType( type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_vertex, false), 0); } spv::Id variable_id = create_variable(spv::StorageClassOutput, type_id); emit_builtin_decoration(variable_id, DXIL::Semantic::ClipDistance, spv::StorageClassOutput); spirv_module.register_builtin_shader_output(variable_id, spv::BuiltInClipDistance); } if (cull_distance_count) { spv::Id type_id = get_type_id(DXIL::ComponentType::F32, cull_distance_count, 1, true); if (execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelMeshEXT) { type_id = builder.makeArrayType( type_id, builder.makeUintConstant(execution_mode_meta.stage_output_num_vertex, false), 0); } spv::Id variable_id = create_variable(spv::StorageClassOutput, type_id); emit_builtin_decoration(variable_id, DXIL::Semantic::CullDistance, spv::StorageClassOutput); spirv_module.register_builtin_shader_output(variable_id, spv::BuiltInCullDistance); } return true; } void Converter::Impl::emit_builtin_interpolation_decorations(spv::Id variable_id, DXIL::Semantic semantic, DXIL::InterpolationMode mode) { switch (semantic) { case DXIL::Semantic::Barycentrics: case DXIL::Semantic::InternalBarycentricsNoPerspective: emit_interpolation_decorations(variable_id, mode); break; case DXIL::Semantic::Position: // DXIL emits NoPerspective here, but seems weird to emit that since it's kinda implied. // Normalize the interpolate mode first, then emit. if (mode == DXIL::InterpolationMode::LinearNoperspective) mode = DXIL::InterpolationMode::Linear; else if (mode == DXIL::InterpolationMode::LinearNoperspectiveCentroid) mode = DXIL::InterpolationMode::LinearCentroid; else if (mode == DXIL::InterpolationMode::LinearNoperspectiveSample) mode = DXIL::InterpolationMode::LinearSample; emit_interpolation_decorations(variable_id, mode); break; default: break; } } void Converter::Impl::emit_interpolation_decorations(spv::Id variable_id, DXIL::InterpolationMode mode) { auto &builder = spirv_module.get_builder(); switch (mode) { case DXIL::InterpolationMode::Constant: builder.addDecoration(variable_id, spv::DecorationFlat); break; case DXIL::InterpolationMode::LinearCentroid: builder.addDecoration(variable_id, spv::DecorationCentroid); break; case DXIL::InterpolationMode::LinearSample: builder.addDecoration(variable_id, spv::DecorationSample); builder.addCapability(spv::CapabilitySampleRateShading); execution_mode_meta.per_sample_shading = true; break; case DXIL::InterpolationMode::LinearNoperspective: builder.addDecoration(variable_id, spv::DecorationNoPerspective); break; case DXIL::InterpolationMode::LinearNoperspectiveCentroid: builder.addDecoration(variable_id, spv::DecorationNoPerspective); builder.addDecoration(variable_id, spv::DecorationCentroid); break; case DXIL::InterpolationMode::LinearNoperspectiveSample: builder.addDecoration(variable_id, spv::DecorationNoPerspective); builder.addDecoration(variable_id, spv::DecorationSample); builder.addCapability(spv::CapabilitySampleRateShading); execution_mode_meta.per_sample_shading = true; break; default: break; } } void Converter::Impl::emit_builtin_decoration(spv::Id id, DXIL::Semantic semantic, spv::StorageClass storage) { auto &builder = spirv_module.get_builder(); bool requires_flat_input = false; switch (semantic) { case DXIL::Semantic::Position: if (execution_model == spv::ExecutionModelFragment) { builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragCoord); spirv_module.register_builtin_shader_input(id, spv::BuiltInFragCoord); } else if (storage == spv::StorageClassInput) { builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPosition); spirv_module.register_builtin_shader_input(id, spv::BuiltInPosition); } else if (storage == spv::StorageClassOutput) { builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPosition); spirv_module.register_builtin_shader_output(id, spv::BuiltInPosition); if (options.invariant_position) builder.addDecoration(id, spv::DecorationInvariant); } break; case DXIL::Semantic::SampleIndex: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInSampleId); spirv_module.register_builtin_shader_input(id, spv::BuiltInSampleId); builder.addCapability(spv::CapabilitySampleRateShading); execution_mode_meta.per_sample_shading = true; requires_flat_input = true; break; case DXIL::Semantic::VertexID: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInVertexIndex); spirv_module.register_builtin_shader_input(id, spv::BuiltInVertexIndex); break; case DXIL::Semantic::InstanceID: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInInstanceIndex); spirv_module.register_builtin_shader_input(id, spv::BuiltInInstanceIndex); break; case DXIL::Semantic::InsideTessFactor: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInTessLevelInner); spirv_module.register_builtin_shader_input(id, spv::BuiltInTessLevelInner); break; case DXIL::Semantic::TessFactor: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInTessLevelOuter); spirv_module.register_builtin_shader_input(id, spv::BuiltInTessLevelOuter); break; case DXIL::Semantic::Coverage: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInSampleMask); spirv_module.register_builtin_shader_output(id, spv::BuiltInSampleMask); break; case DXIL::Semantic::Depth: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragDepth); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthReplacing); spirv_module.register_builtin_shader_output(id, spv::BuiltInFragDepth); break; case DXIL::Semantic::StencilRef: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragStencilRefEXT); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeStencilRefReplacingEXT); builder.addExtension("SPV_EXT_shader_stencil_export"); builder.addCapability(spv::CapabilityStencilExportEXT); spirv_module.register_builtin_shader_output(id, spv::BuiltInFragStencilRefEXT); break; case DXIL::Semantic::DepthLessEqual: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragDepth); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthReplacing); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthLess); spirv_module.register_builtin_shader_output(id, spv::BuiltInFragDepth); break; case DXIL::Semantic::DepthGreaterEqual: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFragDepth); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthReplacing); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDepthGreater); spirv_module.register_builtin_shader_output(id, spv::BuiltInFragDepth); break; case DXIL::Semantic::IsFrontFace: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInFrontFacing); spirv_module.register_builtin_shader_input(id, spv::BuiltInFrontFacing); break; case DXIL::Semantic::ClipDistance: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInClipDistance); builder.addCapability(spv::CapabilityClipDistance); if (storage == spv::StorageClassOutput) spirv_module.register_builtin_shader_output(id, spv::BuiltInClipDistance); else if (storage == spv::StorageClassInput) spirv_module.register_builtin_shader_input(id, spv::BuiltInClipDistance); break; case DXIL::Semantic::CullDistance: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInCullDistance); builder.addCapability(spv::CapabilityCullDistance); if (storage == spv::StorageClassOutput) spirv_module.register_builtin_shader_output(id, spv::BuiltInCullDistance); else if (storage == spv::StorageClassInput) spirv_module.register_builtin_shader_input(id, spv::BuiltInCullDistance); break; case DXIL::Semantic::RenderTargetArrayIndex: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInLayer); if (storage == spv::StorageClassOutput) { spirv_module.register_builtin_shader_output(id, spv::BuiltInLayer); if (execution_model != spv::ExecutionModelGeometry) { builder.addExtension("SPV_EXT_shader_viewport_index_layer"); builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT); } } else { spirv_module.register_builtin_shader_input(id, spv::BuiltInLayer); requires_flat_input = true; } builder.addCapability(spv::CapabilityGeometry); break; case DXIL::Semantic::ViewPortArrayIndex: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInViewportIndex); if (storage == spv::StorageClassOutput) { spirv_module.register_builtin_shader_output(id, spv::BuiltInViewportIndex); if (execution_model != spv::ExecutionModelGeometry) { builder.addExtension("SPV_EXT_shader_viewport_index_layer"); builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT); } } else { spirv_module.register_builtin_shader_input(id, spv::BuiltInViewportIndex); requires_flat_input = true; } builder.addCapability(spv::CapabilityMultiViewport); break; case DXIL::Semantic::PrimitiveID: builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPrimitiveId); if (storage == spv::StorageClassOutput) spirv_module.register_builtin_shader_output(id, spv::BuiltInPrimitiveId); else { spirv_module.register_builtin_shader_input(id, spv::BuiltInPrimitiveId); requires_flat_input = true; } builder.addCapability(spv::CapabilityGeometry); break; case DXIL::Semantic::ShadingRate: if (storage == spv::StorageClassOutput) { if (!options.quirks.ignore_primitive_shading_rate) builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInPrimitiveShadingRateKHR); spirv_module.register_builtin_shader_output(id, spv::BuiltInPrimitiveShadingRateKHR); } else { if (!options.quirks.ignore_primitive_shading_rate) { builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInShadingRateKHR); requires_flat_input = true; } spirv_module.register_builtin_shader_input(id, spv::BuiltInShadingRateKHR); } builder.addExtension("SPV_KHR_fragment_shading_rate"); builder.addCapability(spv::CapabilityFragmentShadingRateKHR); break; case DXIL::Semantic::Barycentrics: case DXIL::Semantic::InternalBarycentricsNoPerspective: { if (options.khr_barycentrics_enabled) { auto builtin = semantic == DXIL::Semantic::Barycentrics ? spv::BuiltInBaryCoordKHR : spv::BuiltInBaryCoordNoPerspKHR; builder.addExtension("SPV_KHR_fragment_shader_barycentric"); builder.addCapability(spv::CapabilityFragmentBarycentricKHR); builder.addDecoration(id, spv::DecorationBuiltIn, builtin); spirv_module.register_builtin_shader_input(id, builtin); } else { // TODO: We're not dealing with centroid vs per-sample decorations here. auto builtin = semantic == DXIL::Semantic::Barycentrics ? spv::BuiltInBaryCoordSmoothAMD : spv::BuiltInBaryCoordNoPerspAMD; builder.addExtension("SPV_AMD_shader_explicit_vertex_parameter"); builder.addDecoration(id, spv::DecorationBuiltIn, builtin); spirv_module.register_builtin_shader_input(id, builtin); } break; } case DXIL::Semantic::CullPrimitive: { builder.addExtension("SPV_EXT_mesh_shader"); builder.addCapability(spv::CapabilityMeshShadingEXT); builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInCullPrimitiveEXT); spirv_module.register_builtin_shader_output(id, spv::BuiltInCullPrimitiveEXT); break; } case DXIL::Semantic::DomainLocation: // This is normally an opcode in DXIL, but custom IR likes it to be a semantic, // and it's easier to just treat it like a normal builtin input. builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInTessCoord); spirv_module.register_builtin_shader_input(id, spv::BuiltInTessCoord); break; case DXIL::Semantic::DispatchThreadID: // This is normally an opcode in DXIL, but custom IR likes it to be a semantic. builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInGlobalInvocationId); spirv_module.register_builtin_shader_input(id, spv::BuiltInGlobalInvocationId); break; case DXIL::Semantic::GroupThreadID: // This is normally an opcode in DXIL, but custom IR likes it to be a semantic. builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInLocalInvocationId); spirv_module.register_builtin_shader_input(id, spv::BuiltInLocalInvocationId); break; case DXIL::Semantic::GroupID: // This is normally an opcode in DXIL, but custom IR likes it to be a semantic. builder.addDecoration(id, spv::DecorationBuiltIn, spv::BuiltInWorkgroupId); spirv_module.register_builtin_shader_input(id, spv::BuiltInWorkgroupId); break; default: LOGE("Unknown DXIL semantic.\n"); break; } // VUID-StandaloneSpirv-Flat-04744 if (requires_flat_input && execution_model == spv::ExecutionModelFragment) builder.addDecoration(id, spv::DecorationFlat); } static bool execution_model_has_incoming_payload(spv::ExecutionModel model) { return model != spv::ExecutionModelRayGenerationKHR && execution_model_is_ray_tracing(model); } static bool execution_model_has_hit_attribute(spv::ExecutionModel model) { switch (model) { case spv::ExecutionModelAnyHitKHR: case spv::ExecutionModelClosestHitKHR: case spv::ExecutionModelIntersectionKHR: return true; default: return false; } } bool Converter::Impl::emit_incoming_payload() { auto *func = get_entry_point_function(entry_point_meta); // The first argument to a RT entry point is always a pointer to payload. if (func->arg_end() - func->arg_begin() >= 1) { auto &arg = *func->arg_begin(); if (!llvm::isa(arg.getType())) return false; auto *elem_type = arg.getType()->getPointerElementType(); spv::StorageClass storage; if (execution_model == spv::ExecutionModelCallableKHR) storage = spv::StorageClassIncomingCallableDataKHR; else storage = spv::StorageClassIncomingRayPayloadKHR; // This is a POD. We'll emit that as a block containing the payload type. spv::Id payload_var = create_variable(storage, get_type_id(elem_type), "payload"); handle_to_storage_class[&arg] = storage; rewrite_value(&arg, payload_var); } return true; } bool Converter::Impl::emit_hit_attribute() { auto *func = get_entry_point_function(entry_point_meta); // The second argument to a RT entry point is always a pointer to hit attribute. if (func->arg_end() - func->arg_begin() >= 2) { auto args = func->arg_begin(); ++args; auto &arg = *args; if (!llvm::isa(arg.getType())) return false; auto *elem_type = arg.getType()->getPointerElementType(); spv::Id hit_attribute_var = create_variable(spv::StorageClassHitAttributeKHR, get_type_id(elem_type), "hit"); handle_to_storage_class[&arg] = spv::StorageClassHitAttributeKHR; rewrite_value(&arg, hit_attribute_var); } else if (execution_model == spv::ExecutionModelIntersectionKHR && llvm_hit_attribute_output_type) { auto *elem_type = llvm_hit_attribute_output_type->getPointerElementType(); llvm_hit_attribute_output_value = create_variable(spv::StorageClassHitAttributeKHR, get_type_id(elem_type), "hit"); } return true; } bool Converter::Impl::emit_global_variables() { auto &module = bitcode_parser.get_module(); if (execution_model_has_incoming_payload(execution_model)) if (!emit_incoming_payload()) return false; if (execution_model_has_hit_attribute(execution_model)) if (!emit_hit_attribute()) return false; for (auto itr = module.global_begin(); itr != module.global_end(); ++itr) { llvm::GlobalVariable &global = *itr; auto address_space = static_cast(global.getType()->getAddressSpace()); // Workarounds for DXR. RT resources tend to be declared with external linkage + structs. // Groupshared is also declared with external linkage, even if that is bogus. // Make sure we declare global internal struct LUTs at the very least ... if (global.getLinkage() == llvm::GlobalVariable::ExternalLinkage && address_space != DXIL::AddressSpace::GroupShared) { continue; } // Ignore @llvm.global_ctors(). Only observed once with dummy ctor. // It probably is not intended to work. if (global.getLinkage() == llvm::GlobalVariable::AppendingLinkage) continue; spv::Id pointee_type_id = 0; spv::Id scalar_type_id = 0; bool padded_composite = false; bool complex_composite = false; if (address_space == DXIL::AddressSpace::Thread && options.extended_robustness.constant_lut && global.hasInitializer() && global.isConstant()) { if (auto *array_type = llvm::dyn_cast(global.getType()->getPointerElementType())) { scalar_type_id = get_type_id(array_type->getArrayElementType()); pointee_type_id = builder().makeArrayType( scalar_type_id, builder().makeUintConstant(array_type->getArrayNumElements() + 1), false); padded_composite = true; } } else if (address_space == DXIL::AddressSpace::GroupShared && shader_analysis.require_wmma) { // Workaround for bugged WMMA shaders. // The shaders rely on AMD aligning LDS size to 512 bytes. // This avoids overflow spilling into LDSTranspose area by mistake, which breaks some shaders. if (auto *array_type = llvm::dyn_cast(global.getType()->getPointerElementType())) { scalar_type_id = get_type_id(array_type->getArrayElementType()); uint32_t elem_count = array_type->getArrayNumElements(); uint32_t alignment = (512 * 8) / array_type->getArrayElementType()->getIntegerBitWidth(); elem_count = (elem_count + alignment - 1) & ~(alignment - 1); pointee_type_id = builder().makeArrayType( scalar_type_id, builder().makeUintConstant(elem_count), false); } } else if (shader_analysis.require_wmma) { if (ags_alloca_or_global_filter(*this, &global, pointee_type_id)) complex_composite = true; } if (!pointee_type_id) pointee_type_id = get_type_id(global.getType()->getPointerElementType()); // Happens for some global variables in DXR for some reason, benign. if (pointee_type_id == 0) continue; spv::Id initializer_id = 0; llvm::Constant *initializer = nullptr; if (global.hasInitializer()) initializer = global.getInitializer(); if (initializer && llvm::isa(initializer)) initializer = nullptr; if (address_space == DXIL::AddressSpace::GroupShared) { if (initializer) { // FIXME: Is this even legal DXIL? LOGW("Global variable address space cannot have initializer! Ignoring ...\n"); initializer = nullptr; } } if (initializer) { if (complex_composite) { if (!llvm::isa(initializer)) { LOGE("WMMA initializer must be all zero.\n"); return false; } initializer_id = builder().makeNullConstant(pointee_type_id); } else if (padded_composite) initializer_id = get_padded_constant_array(pointee_type_id, initializer); else initializer_id = get_id_for_constant(initializer, 0); } spv::StorageClass storage_class = address_space == DXIL::AddressSpace::GroupShared ? spv::StorageClassWorkgroup : spv::StorageClassPrivate; spv::Id var_id = create_variable_with_initializer( get_effective_storage_class(&global, storage_class), pointee_type_id, initializer_id); decorate_relaxed_precision(global.getType()->getPointerElementType(), var_id, false); rewrite_value(&global, var_id); } return true; } static void adjust_system_value(DXIL::Semantic &semantic, DXIL::InterpolationMode &interpolation) { if (semantic == DXIL::Semantic::Barycentrics) { switch (interpolation) { case DXIL::InterpolationMode::LinearNoperspective: semantic = DXIL::Semantic::InternalBarycentricsNoPerspective; interpolation = DXIL::InterpolationMode::Linear; break; case DXIL::InterpolationMode::LinearNoperspectiveCentroid: semantic = DXIL::Semantic::InternalBarycentricsNoPerspective; interpolation = DXIL::InterpolationMode::LinearCentroid; break; case DXIL::InterpolationMode::LinearNoperspectiveSample: semantic = DXIL::Semantic::InternalBarycentricsNoPerspective; interpolation = DXIL::InterpolationMode::LinearSample; break; default: break; } } } bool Converter::Impl::emit_stage_input_variables() { auto *node = entry_point_meta; if (!node->getOperand(2)) return true; auto &signature = node->getOperand(2); auto *signature_node = llvm::cast(signature); auto &inputs = signature_node->getOperand(0); if (!inputs) return true; bool stage_arrayed_inputs = execution_model == spv::ExecutionModelGeometry || execution_model == spv::ExecutionModelTessellationControl || execution_model == spv::ExecutionModelTessellationEvaluation; uint32_t stage_input_vertices = execution_mode_meta.stage_input_num_vertex; if (execution_model == spv::ExecutionModelTessellationControl) { // The control point input arrays are effectively unsized. We have to give it something, so use upper bound. constexpr uint32_t MaxControlPoints = 32; stage_input_vertices = MaxControlPoints; } auto *inputs_node = llvm::dyn_cast(inputs); auto &builder = spirv_module.get_builder(); unsigned clip_distance_count = 0; unsigned cull_distance_count = 0; bool auto_patch_location = patch_location_offset == ~0u && execution_model == spv::ExecutionModelTessellationEvaluation; if (auto_patch_location) patch_location_offset = 0; for (unsigned i = 0; i < inputs_node->getNumOperands(); i++) { bool arrayed_input = stage_arrayed_inputs; auto *input = llvm::cast(inputs_node->getOperand(i)); auto element_id = get_constant_metadata(input, 0); auto semantic_name = get_string_metadata(input, 1); auto actual_element_type = normalize_component_type(static_cast(get_constant_metadata(input, 2))); auto effective_element_type = get_effective_input_output_type(actual_element_type); auto system_value = static_cast(get_constant_metadata(input, 3)); unsigned semantic_index = 0; if (input->getOperand(4)) semantic_index = get_constant_metadata(llvm::cast(input->getOperand(4)), 0); auto interpolation = static_cast(get_constant_metadata(input, 5)); adjust_system_value(system_value, interpolation); auto rows = get_constant_metadata(input, 6); auto cols = get_constant_metadata(input, 7); auto start_row = get_constant_metadata(input, 8); auto start_col = get_constant_metadata(input, 9); if (auto_patch_location) patch_location_offset = std::max(patch_location_offset, start_row + rows); // For HS <-> DS, ignore system values. // Allow certain system values that are synthesized however. if (execution_model == spv::ExecutionModelTessellationEvaluation && system_value != DXIL::Semantic::DomainLocation) system_value = DXIL::Semantic::User; bool masked_input = false; if (system_value == DXIL::Semantic::ShadingRate && options.quirks.ignore_primitive_shading_rate) masked_input = true; if (!options.khr_barycentrics_enabled) { if (system_value == DXIL::Semantic::Barycentrics || system_value == DXIL::Semantic::InternalBarycentricsNoPerspective) { cols = 2; } } spv::Id type_id = get_type_id(effective_element_type, rows, cols); if (system_value == DXIL::Semantic::Position) { type_id = get_type_id(effective_element_type, rows, 4); } else if (system_value == DXIL::Semantic::IsFrontFace) { // Need to cast this to uint when loading the semantic input. type_id = builder.makeBoolType(); } else if (system_value == DXIL::Semantic::ClipDistance) { // DX is rather weird here and you can declare clip distance either as a vector or array, or both! input_clip_cull_meta[element_id] = { clip_distance_count, cols, spv::BuiltInClipDistance }; input_elements_meta[element_id] = { 0, actual_element_type, 0, system_value }; clip_distance_count += rows * cols; continue; } else if (system_value == DXIL::Semantic::CullDistance) { // DX is rather weird here and you can declare clip distance either as a vector or array, or both! input_clip_cull_meta[element_id] = { cull_distance_count, cols, spv::BuiltInCullDistance }; input_elements_meta[element_id] = { 0, actual_element_type, 0, system_value }; cull_distance_count += rows * cols; continue; } else if (system_value == DXIL::Semantic::PrimitiveID || system_value == DXIL::Semantic::ShadingRate || system_value == DXIL::Semantic::DomainLocation) { arrayed_input = false; } bool per_vertex = llvm_attribute_at_vertex_indices.count(element_id) != 0; if (arrayed_input) { type_id = builder.makeArrayType(type_id, builder.makeUintConstant(stage_input_vertices), 0); } else if (per_vertex && options.khr_barycentrics_enabled) { // TODO: Does this change for barycentrics with lines? type_id = builder.makeArrayType(type_id, builder.makeUintConstant(3), 0); // Default. We should emit PerVertex instead of flat. Linear here is the default, don't emit anything. interpolation = DXIL::InterpolationMode::Linear; } auto variable_name = semantic_name; if (semantic_index != 0) { variable_name += "_"; variable_name += dxil_spv::to_string(semantic_index); } spv::Id variable_id = create_variable(masked_input ? spv::StorageClassPrivate : spv::StorageClassInput, type_id, variable_name.c_str()); input_elements_meta[element_id] = { variable_id, actual_element_type, system_value != DXIL::Semantic::User ? start_row : 0, system_value }; if (per_vertex) { if (options.khr_barycentrics_enabled) { builder.addExtension("SPV_KHR_fragment_shader_barycentric"); builder.addCapability(spv::CapabilityFragmentBarycentricKHR); builder.addDecoration(variable_id, spv::DecorationPerVertexKHR); } else { builder.addExtension("SPV_AMD_shader_explicit_vertex_parameter"); builder.addDecoration(variable_id, spv::DecorationExplicitInterpAMD); } } if (effective_element_type != actual_element_type && component_type_is_16bit(actual_element_type)) builder.addDecoration(variable_id, spv::DecorationRelaxedPrecision); if (system_value != DXIL::Semantic::User) { emit_builtin_decoration(variable_id, system_value, spv::StorageClassInput); if (execution_model == spv::ExecutionModelFragment) emit_builtin_interpolation_decorations(variable_id, system_value, interpolation); } else { if (execution_model == spv::ExecutionModelFragment) emit_interpolation_decorations(variable_id, interpolation); VulkanStageIO vk_input = { start_row, start_col }; if (resource_mapping_iface) { D3DStageIO d3d_input = { semantic_name.c_str(), semantic_index, start_row, rows }; if (execution_model == spv::ExecutionModelVertex) { if (!resource_mapping_iface->remap_vertex_input(d3d_input, vk_input)) return false; } if (!resource_mapping_iface->remap_stage_input(d3d_input, vk_input)) return false; } builder.addDecoration(variable_id, spv::DecorationLocation, vk_input.location); if (execution_model != spv::ExecutionModelVertex && vk_input.component != 0) builder.addDecoration(variable_id, spv::DecorationComponent, vk_input.component); if (execution_model == spv::ExecutionModelFragment && (vk_input.flags & STAGE_IO_PER_PRIMITIVE)) { builder.addDecoration(variable_id, spv::DecorationPerPrimitiveEXT); builder.addExtension("SPV_EXT_mesh_shader"); builder.addCapability(spv::CapabilityMeshShadingEXT); } } } if (clip_distance_count) { spv::Id type_id = get_type_id(DXIL::ComponentType::F32, clip_distance_count, 1, true); if (stage_arrayed_inputs) { type_id = builder.makeArrayType( type_id, builder.makeUintConstant(stage_input_vertices, false), 0); } spv::Id variable_id = create_variable(spv::StorageClassInput, type_id); emit_builtin_decoration(variable_id, DXIL::Semantic::ClipDistance, spv::StorageClassInput); spirv_module.register_builtin_shader_input(variable_id, spv::BuiltInClipDistance); } if (cull_distance_count) { spv::Id type_id = get_type_id(DXIL::ComponentType::F32, cull_distance_count, 1, true); if (stage_arrayed_inputs) { type_id = builder.makeArrayType( type_id, builder.makeUintConstant(stage_input_vertices, false), 0); } spv::Id variable_id = create_variable(spv::StorageClassInput, type_id); emit_builtin_decoration(variable_id, DXIL::Semantic::CullDistance, spv::StorageClassInput); spirv_module.register_builtin_shader_input(variable_id, spv::BuiltInCullDistance); } return true; } spv::Id Converter::Impl::build_sampled_image(spv::Id image_id, spv::Id sampler_id, bool comparison) { bool is_non_uniform = handle_to_resource_meta[image_id].non_uniform || handle_to_resource_meta[sampler_id].non_uniform; auto itr = std::find_if(combined_image_sampler_cache.begin(), combined_image_sampler_cache.end(), [&](const CombinedImageSampler &combined) { return combined.image_id == image_id && combined.sampler_id == sampler_id && combined.non_uniform == is_non_uniform; }); if (itr != combined_image_sampler_cache.end()) return itr->combined_id; auto &builder = spirv_module.get_builder(); spv::Id image_type_id = get_type_id(image_id); spv::Dim dim = builder.getTypeDimensionality(image_type_id); bool arrayed = builder.isArrayedImageType(image_type_id); bool multisampled = builder.isMultisampledImageType(image_type_id); spv::Id sampled_format = builder.getImageComponentType(image_type_id); image_type_id = builder.makeImageType(sampled_format, dim, comparison, arrayed, multisampled, 1, spv::ImageFormatUnknown); Operation *op = allocate(spv::OpSampledImage, builder.makeSampledImageType(image_type_id)); op->add_ids({ image_id, sampler_id }); add(op); if (is_non_uniform) { builder.addDecoration(op->id, spv::DecorationNonUniformEXT); op->flags |= Operation::SinkableBit; } combined_image_sampler_cache.push_back({ image_id, sampler_id, op->id, is_non_uniform }); return op->id; } spv::Id Converter::Impl::build_vector_type(spv::Id element_type, unsigned count) { auto &builder = spirv_module.get_builder(); if (count == 1) return element_type; else return builder.makeVectorType(element_type, count); } spv::Id Converter::Impl::build_vector(spv::Id element_type, const spv::Id *elements, unsigned count) { if (count == 1) return elements[0]; auto &builder = spirv_module.get_builder(); Operation *op = allocate(spv::OpCompositeConstruct, builder.makeVectorType(element_type, count)); for (unsigned i = 0; i < count; i++) op->add_id(elements[i]); add(op); return op->id; } spv::Id Converter::Impl::build_constant_vector(spv::Id element_type, const spv::Id *elements, unsigned count) { if (count == 1) return elements[0]; auto &builder = spirv_module.get_builder(); return builder.makeCompositeConstant(builder.makeVectorType(element_type, count), { elements, elements + count }); } spv::Id Converter::Impl::build_splat_constant_vector(spv::Id element_type, spv::Id value, unsigned count) { spv::Id ids[4]; for (unsigned i = 0; i < count; i++) ids[i] = value; return build_constant_vector(element_type, ids, count); } spv::Id Converter::Impl::build_offset(spv::Id value, unsigned offset) { if (offset == 0) return value; auto &builder = spirv_module.get_builder(); Operation *op = allocate(spv::OpIAdd, builder.makeUintType(32)); op->add_ids({ value, builder.makeUintConstant(offset) }); add(op); return op->id; } void Converter::Impl::repack_sparse_feedback(DXIL::ComponentType component_type, unsigned num_components, const llvm::Value *value, const llvm::Type *target_type, spv::Id override_value) { auto *code_id = allocate(spv::OpCompositeExtract, builder().makeUintType(32)); code_id->add_id(get_id_for_value(value)); code_id->add_literal(0); add(code_id); auto effective_component_type = get_effective_typed_resource_type(component_type); spv::Id texel_id; if (override_value) { texel_id = override_value; } else { auto *texel = allocate(spv::OpCompositeExtract, get_type_id(effective_component_type, 1, num_components)); texel->add_id(get_id_for_value(value)); texel->add_literal(1); add(texel); texel_id = texel->id; } fixup_load_type_typed(component_type, num_components, texel_id, target_type); spv::Id components[5]; if (num_components > 1) { for (unsigned i = 0; i < num_components; i++) { auto *extract_op = allocate(spv::OpCompositeExtract, get_type_id(component_type, 1, 1)); extract_op->add_id(texel_id); extract_op->add_literal(i); add(extract_op); components[i] = extract_op->id; } } else { for (auto &comp : components) comp = texel_id; num_components = 4; } components[num_components] = code_id->id; auto *repack_op = allocate(spv::OpCompositeConstruct, get_type_id(value->getType())); for (auto &comp : components) repack_op->add_id(comp); add(repack_op); rewrite_value(value, repack_op->id); } bool Converter::Impl::support_native_fp16_operations() const { return execution_mode_meta.native_16bit_operations || options.min_precision_prefer_native_16bit; } spv::Id Converter::Impl::build_value_cast(spv::Id value_id, DXIL::ComponentType input_type, DXIL::ComponentType output_type, unsigned components) { // This path only hits for bitcasts or 16-bit <-> 32-bit casts. bool output_16bit = component_type_is_16bit(output_type); bool input_16bit = component_type_is_16bit(input_type); spv::Op opcode = spv::OpBitcast; if (output_16bit != input_16bit) { switch (input_type) { case DXIL::ComponentType::F16: case DXIL::ComponentType::F32: opcode = spv::OpFConvert; break; case DXIL::ComponentType::I16: case DXIL::ComponentType::I32: opcode = spv::OpSConvert; break; case DXIL::ComponentType::U16: case DXIL::ComponentType::U32: opcode = spv::OpUConvert; break; default: break; } // OpUConvert is not allowed on integer outputs. // We also need SConvert if we're doing 16 -> I32, // since what we actually want is I16 -> I32. switch (output_type) { case DXIL::ComponentType::I16: case DXIL::ComponentType::I32: opcode = spv::OpSConvert; break; default: break; } } Operation *op = allocate(opcode, get_type_id(output_type, 1, components)); op->add_id(value_id); add(op); return op->id; } void Converter::Impl::fixup_load_type_io(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value) { auto output_component_type = component_type; auto input_component_type = component_type; bool promote_fp16 = input_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations(); if (!options.storage_16bit_input_output || promote_fp16) input_component_type = convert_16bit_component_to_32bit(input_component_type); if (promote_fp16) output_component_type = convert_16bit_component_to_32bit(output_component_type); output_component_type = convert_component_to_unsigned(output_component_type); if (output_component_type != input_component_type) { rewrite_value(value, build_value_cast(get_id_for_value(value), input_component_type, output_component_type, components)); } } void Converter::Impl::fixup_load_type_atomic(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value) { auto output_component_type = component_type; auto input_component_type = component_type; output_component_type = convert_component_to_unsigned(output_component_type); if (output_component_type != input_component_type) { rewrite_value(value, build_value_cast(get_id_for_value(value), input_component_type, output_component_type, components)); } } void Converter::Impl::fixup_load_type_typed(DXIL::ComponentType &component_type, unsigned components, spv::Id &value_id, const llvm::Type *target_type) { auto output_component_type = component_type; auto input_component_type = get_effective_typed_resource_type(component_type); if (output_component_type == DXIL::ComponentType::U64 && target_type->getIntegerBitWidth() == 32) { // If the component type is U64 it's used for atomics, but load/store interface is still 32-bit. // Bit-cast rather than value cast. auto *bitcast_op = allocate(spv::OpCompositeExtract, builder().makeUintType(64)); bitcast_op->add_id(value_id); bitcast_op->add_literal(0); add(bitcast_op); auto *u32_cast_op = allocate(spv::OpBitcast, builder().makeVectorType(builder().makeUintType(32), 2)); u32_cast_op->add_id(bitcast_op->id); add(u32_cast_op); output_component_type = DXIL::ComponentType::U32; if (components > 2) { auto *composite_op = allocate(spv::OpCompositeConstruct, builder().makeVectorType(builder().makeUintType(32), components)); composite_op->add_id(u32_cast_op->id); for (unsigned i = 2; i < components; i++) composite_op->add_id(builder().makeUintConstant(0)); add(composite_op); value_id = composite_op->id; } else if (components == 1) { auto *extract_op = allocate(spv::OpCompositeExtract, builder().makeUintType(32)); extract_op->add_id(u32_cast_op->id); extract_op->add_literal(0); add(extract_op); value_id = extract_op->id; } else value_id = u32_cast_op->id; } else { if (output_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations()) output_component_type = convert_16bit_component_to_32bit(output_component_type); else if (target_type->getTypeID() == llvm::Type::TypeID::FloatTyID) { // Only convert if we actually want half here. // Certain operations always return float even if the resource type is half for some silly reason. output_component_type = DXIL::ComponentType::F32; } output_component_type = convert_component_to_unsigned(output_component_type); if (output_component_type != input_component_type) value_id = build_value_cast(value_id, input_component_type, output_component_type, components); component_type = output_component_type; } } void Converter::Impl::fixup_load_type_typed(DXIL::ComponentType component_type, unsigned components, const llvm::Value *value, const llvm::Type *target_type) { spv::Id value_id = get_id_for_value(value); spv::Id new_value_id = value_id; fixup_load_type_typed(component_type, components, new_value_id, target_type); if (new_value_id != value_id) rewrite_value(value, new_value_id); } spv::Id Converter::Impl::fixup_store_type_io(DXIL::ComponentType component_type, unsigned components, spv::Id value) { auto output_component_type = component_type; auto input_component_type = component_type; if (!options.storage_16bit_input_output || (output_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations())) { output_component_type = convert_16bit_component_to_32bit(output_component_type); } if (input_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations()) input_component_type = convert_16bit_component_to_32bit(input_component_type); input_component_type = convert_component_to_unsigned(input_component_type); if (output_component_type != input_component_type) value = build_value_cast(value, input_component_type, output_component_type, components); return value; } spv::Id Converter::Impl::fixup_store_type_atomic(DXIL::ComponentType component_type, unsigned components, spv::Id value) { auto output_component_type = component_type; auto input_component_type = component_type; input_component_type = convert_component_to_unsigned(input_component_type); if (output_component_type != input_component_type) value = build_value_cast(value, input_component_type, output_component_type, components); return value; } spv::Id Converter::Impl::fixup_store_type_typed(DXIL::ComponentType component_type, unsigned components, spv::Id value) { if (component_type == DXIL::ComponentType::U64) { // If the component type is U64 it's used for atomics, but load/store interface is still 32-bit. // Bit-cast rather than value cast. spv::Id u64_ids[4] = {}; for (unsigned i = 0; i < components / 2; i++) { auto *shuffle_op = allocate(spv::OpVectorShuffle, builder().makeVectorType(builder().makeUintType(32), 2)); shuffle_op->add_id(value); shuffle_op->add_id(value); shuffle_op->add_literal(2 * i + 0); shuffle_op->add_literal(2 * i + 1); add(shuffle_op); auto *cast_op = allocate(spv::OpBitcast, builder().makeUintType(64)); cast_op->add_id(shuffle_op->id); add(cast_op); u64_ids[i] = cast_op->id; } for (unsigned i = components / 2; i < components; i++) u64_ids[i] = builder().makeUint64Constant(0); value = build_vector(builder().makeUintType(64), u64_ids, components); } else { auto output_component_type = get_effective_typed_resource_type(component_type); auto input_component_type = component_type; if (input_component_type == DXIL::ComponentType::F16 && !support_native_fp16_operations()) input_component_type = convert_16bit_component_to_32bit(input_component_type); input_component_type = convert_component_to_unsigned(input_component_type); if (output_component_type != input_component_type) value = build_value_cast(value, input_component_type, output_component_type, components); } return value; } bool Converter::Impl::emit_phi_instruction(CFGNode *block, const llvm::PHINode &instruction) { unsigned count = instruction.getNumIncomingValues(); spv::Id override_type = 0; if (ags_filter_phi(*this, instruction, override_type)) return true; if (count == 1) { // Degenerate PHI. Seems to happen in some bizarre cases with lcssa passes? auto *value = instruction.getIncomingValue(0); rewrite_value(&instruction, get_id_for_value(value)); // This PHI node can actually be pointer or descriptor for whatever reason, // so inherit any such mappings. { auto itr = handle_to_storage_class.find(value); if (itr != handle_to_storage_class.end()) handle_to_storage_class[&instruction] = itr->second; } { auto itr = handle_to_root_member_offset.find(value); if (itr != handle_to_root_member_offset.end()) handle_to_root_member_offset[&instruction] = itr->second; } } else { PHI phi; phi.id = get_id_for_value(&instruction); auto itr = llvm_composite_meta.find(&instruction); if (itr != llvm_composite_meta.end() && itr->second.components <= 4 && (itr->second.access_mask & ~0xfu) == 0 && std::find(llvm_dxil_op_fake_struct_types.begin(), llvm_dxil_op_fake_struct_types.end(), instruction.getType()) != llvm_dxil_op_fake_struct_types.end()) { // Using PHI as a composite is exceedingly quirky, but it does come up. // FIXME: This could go wrong if one incoming value uses different components // from the others, but this scenario has only ever been observed from single-incoming // values, so this code path shouldn't really be taken at all. phi.type_id = get_type_id(instruction.getType()->getStructElementType(0)); if (itr->second.components > 1) phi.type_id = builder().makeVectorType(phi.type_id, itr->second.components); } else { phi.type_id = override_type ? override_type : get_type_id(instruction.getType()); } phi.relaxed = type_can_relax_precision(instruction.getType(), false); for (unsigned i = 0; i < count; i++) { IncomingValue incoming = {}; auto bb_itr = bb_map.find(instruction.getIncomingBlock(i)); // If the block was statically eliminated, it might not exist. if (bb_itr != bb_map.end()) { incoming.block = bb_itr->second->node; auto *value = instruction.getIncomingValue(i); incoming.id = get_id_for_value(value); phi.incoming.push_back(incoming); } } if (phi.incoming.empty()) { LOGE("PHI instruction has zero incoming blocks.\n"); return false; } if (phi.incoming.size() > 1) block->ir.phi.push_back(std::move(phi)); else rewrite_value(&instruction, phi.incoming.front().id); } return true; } static bool instruction_has_side_effects(const llvm::Instruction &instruction) { if (llvm::isa(&instruction) || llvm::isa(&instruction) || llvm::isa(&instruction)) { return true; } if (auto *call_inst = llvm::dyn_cast(&instruction)) { auto *called_function = call_inst->getCalledFunction(); if (strncmp(called_function->getName().data(), "dx.op", 5) == 0) return dxil_instruction_has_side_effects(call_inst); else return true; } return false; } bool Converter::Impl::emit_instruction(CFGNode *block, const llvm::Instruction &instruction) { if (instruction.isTerminator()) return true; // We really shouldn't have to do this, but DXC misses some dead SSA ops. // Helps sanitize repro suite output in some cases. if (options.eliminate_dead_code && !instruction_has_side_effects(instruction) && llvm_used_ssa_values.count(&instruction) == 0) { return true; } current_block = &block->ir.operations; if (auto *call_inst = llvm::dyn_cast(&instruction)) { auto *called_function = call_inst->getCalledFunction(); if (strncmp(called_function->getName().data(), "dx.op", 5) == 0) { return emit_dxil_instruction(*this, call_inst); } else if (strncmp(called_function->getName().data(), "llvm.", 5) == 0) { // lib_6_6 sometimes emits llvm.lifetime.begin/end for some bizarre reason. // Just ignore ... return true; } else { return emit_call_instruction(*this, *call_inst); } } else if (auto *phi_inst = llvm::dyn_cast(&instruction)) return emit_phi_instruction(block, *phi_inst); else return emit_llvm_instruction(*this, instruction); current_block = nullptr; return false; } bool Converter::Impl::emit_execution_modes_node_output(llvm::MDNode *output) { NodeOutputMeta output_meta = {}; bool is_rw_sharing; output_meta.payload_stride = node_parse_payload_stride(output, is_rw_sharing); output_meta.spec_constant_node_index = builder().makeUintConstant(0, true); builder().addDecoration(output_meta.spec_constant_node_index, spv::DecorationSpecId, int(NodeSpecIdOutputBase + node_outputs.size())); uint32_t num_ops = output->getNumOperands(); for (uint32_t i = 0; i < num_ops; i += 2) { auto tag = DXIL::NodeMetadataTag(get_constant_metadata(output, i)); if (tag == DXIL::NodeMetadataTag::NodeOutputID) { auto *output_node = llvm::cast(output->getOperand(i + 1)); String name = get_string_metadata(output_node, 0); builder().addName(output_meta.spec_constant_node_index, name.c_str()); // FIXME: This is probably not accurate for arrayed nodes. // Can recursive nodes be arrayed? Seems very spicy ... output_meta.is_recursive = name == node_input.node_id && node_input.node_array_index == get_constant_metadata(output_node, 1); } } node_outputs.push_back(output_meta); return true; } NodeDispatchGrid Converter::Impl::node_parse_dispatch_grid(llvm::MDNode *node_meta) { uint32_t num_ops = node_meta->getNumOperands(); for (uint32_t i = 0; i < num_ops; i += 2) { auto tag = DXIL::NodeMetadataTag(get_constant_metadata(node_meta, i)); if (tag == DXIL::NodeMetadataTag::NodeRecordType) { auto *node_record_type = llvm::cast(node_meta->getOperand(i + 1)); for (uint32_t j = 0; j < node_record_type->getNumOperands(); j += 2) { if (get_constant_metadata(node_record_type, j) == 1) { auto *dispatch_info = llvm::cast(node_record_type->getOperand(j + 1)); uint32_t byte_offset = get_constant_metadata(dispatch_info, 0); auto component_type = DXIL::ComponentType(get_constant_metadata(dispatch_info, 1)); uint32_t num_components = get_constant_metadata(dispatch_info, 2); return { byte_offset, component_type, num_components }; } } } } return {}; } uint32_t Converter::Impl::node_parse_payload_stride(llvm::MDNode *node_meta, bool &is_rw_sharing) { uint32_t num_ops = node_meta->getNumOperands(); uint32_t payload_stride = 0; is_rw_sharing = false; for (uint32_t i = 0; i < num_ops; i += 2) { auto tag = DXIL::NodeMetadataTag(get_constant_metadata(node_meta, i)); if (tag == DXIL::NodeMetadataTag::NodeIOFlags) { uint32_t node_io_flags = get_constant_metadata(node_meta, i + 1); if ((node_io_flags & DXIL::NodeIOEmptyRecordBit) != 0) return 0; if ((node_io_flags & DXIL::NodeIOTrackRWInputSharingBit) != 0) is_rw_sharing = true; } else if (tag == DXIL::NodeMetadataTag::NodeRecordType) { auto *node_record_type = llvm::cast(node_meta->getOperand(i + 1)); for (uint32_t j = 0; j < node_record_type->getNumOperands(); j += 2) { if (get_constant_metadata(node_record_type, j) == 0) { uint32_t input_node_size = get_constant_metadata(node_record_type, j + 1); payload_stride = input_node_size; } } } } if (is_rw_sharing) { // DXIL metadata does not account for the implied u32 used for group sharing. // In case the last member is u16, align to u32. payload_stride = (payload_stride + 3u) & ~3u; // Allocate space for magic word. payload_stride += 4; } return payload_stride; } bool Converter::Impl::emit_execution_modes_node_input() { spv::Id u32_type_id = builder().makeUintType(32); spv::Id uvec2_type_id = builder().makeVectorType(u32_type_id, 2); spv::Id u64_type_id = builder().makeUintType(64); if (node_input.payload_stride) { node_input.private_bda_var_id = create_variable( spv::StorageClassPrivate, u64_type_id, "NodeInputPayloadBDA"); node_input.private_stride_var_id = create_variable( spv::StorageClassPrivate, u32_type_id, "NodeInputStride"); } // We have to rewrite global IDs. Local invocation should remain intact. spv::Id uvec3_type = builder().makeVectorType(u32_type_id, 3); spv::Id workgroup_id = create_variable(spv::StorageClassPrivate, uvec3_type, "WorkgroupID"); spv::Id global_invocation_id = create_variable(spv::StorageClassPrivate, uvec3_type, "GlobalInvocationID"); spirv_module.register_builtin_shader_input(workgroup_id, spv::BuiltInWorkgroupId); spirv_module.register_builtin_shader_input(global_invocation_id, spv::BuiltInGlobalInvocationId); // Emit binding model. // Push constants are our only option. if (!options.inline_ubo_enable) { LOGE("When compiling for nodes, inline UBO path must be enabled for root parameters.\n"); return false; } node_input.shader_record_block_type_id = emit_shader_record_buffer_block_type(true); spv::Id ptr_shader_record_block_type_id = 0; if (node_input.shader_record_block_type_id) { ptr_shader_record_block_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, node_input.shader_record_block_type_id); } else { // Dummy type ptr_shader_record_block_type_id = builder().makeVectorType(builder().makeUintType(32), 2); } // Declare the ABI for dispatching a node. This will change depending on the dispatch mode, // and style of execution (indirect pull or array). spv::Id u32_array_type_id = builder().makeRuntimeArray(u32_type_id); builder().addDecoration(u32_array_type_id, spv::DecorationArrayStride, 4); spv::Id u32_struct_type_id = builder().makeStructType({ u32_type_id }, "NodeReadonlyU32Ptr"); builder().addDecoration(u32_struct_type_id, spv::DecorationBlock); builder().addMemberDecoration(u32_struct_type_id, 0, spv::DecorationOffset, 0); builder().addMemberDecoration(u32_struct_type_id, 0, spv::DecorationNonWritable); builder().addMemberName(u32_struct_type_id, 0, "value"); spv::Id u32_ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, u32_struct_type_id); spv::Id u32_array_struct_type_id = builder().makeStructType({ u32_array_type_id }, "NodeReadonlyU32ArrayPtr"); builder().addDecoration(u32_array_struct_type_id, spv::DecorationBlock); builder().addMemberDecoration(u32_array_struct_type_id, 0, spv::DecorationOffset, 0); builder().addMemberDecoration(u32_array_struct_type_id, 0, spv::DecorationNonWritable); builder().addMemberName(u32_array_struct_type_id, 0, "offsets"); spv::Id u32_array_ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, u32_array_struct_type_id); const Vector members = { u64_type_id, u32_ptr_type_id, u32_ptr_type_id, uvec2_type_id, u64_type_id, u64_type_id, ptr_shader_record_block_type_id, u32_type_id, u32_type_id, }; spv::Id type_id = builder().makeStructType(members, "NodeDispatchRegisters"); builder().addMemberDecoration(type_id, NodePayloadBDA, spv::DecorationOffset, 0); builder().addMemberDecoration(type_id, NodeLinearOffsetBDA, spv::DecorationOffset, 8); builder().addMemberDecoration(type_id, NodeEndNodesBDA, spv::DecorationOffset, 16); builder().addMemberDecoration(type_id, NodePayloadStrideOrOffsetsBDA, spv::DecorationOffset, 24); builder().addMemberDecoration(type_id, NodePayloadOutputBDA, spv::DecorationOffset, 32); builder().addMemberDecoration(type_id, NodePayloadOutputAtomicBDA, spv::DecorationOffset, 40); builder().addMemberDecoration(type_id, NodeLocalRootSignatureBDA, spv::DecorationOffset, 48); builder().addMemberDecoration(type_id, NodePayloadOutputOffset, spv::DecorationOffset, 56); builder().addMemberDecoration(type_id, NodeRemainingRecursionLevels, spv::DecorationOffset, 60); // For linear node layout (entry point). // Node payload is found at PayloadLinearBDA + NodeIndex * PayloadStride. builder().addMemberName(type_id, NodePayloadBDA, "PayloadLinearBDA"); // With packed workgroup layout, need to apply an offset. builder().addMemberName(type_id, NodeLinearOffsetBDA, "NodeLinearOffsetBDA"); // For thread and coalesce, need to know total number of threads to mask execution on edge. builder().addMemberName(type_id, NodeEndNodesBDA, "NodeEndNodesBDA"); builder().addMemberName(type_id, NodePayloadStrideOrOffsetsBDA, "NodePayloadStrideOrOffsetsBDA"); builder().addMemberName(type_id, NodePayloadOutputBDA, "NodePayloadOutputBDA"); builder().addMemberName(type_id, NodePayloadOutputAtomicBDA, "NodePayloadOutputAtomicBDA"); builder().addMemberName(type_id, NodeLocalRootSignatureBDA, "NodeLocalRootSignatureBDA"); // For broadcast nodes. Need to instance multiple times. // Becomes WorkGroupID and affects GlobalInvocationID. builder().addMemberName(type_id, NodePayloadOutputOffset, "NodePayloadOutputOffset"); builder().addMemberName(type_id, NodeRemainingRecursionLevels, "NodeRemainingRecursionLevels"); builder().addDecoration(type_id, spv::DecorationBlock); node_input.node_dispatch_push_id = create_variable(spv::StorageClassPushConstant, type_id, "NodeDispatch"); builder().addDecoration(node_input.node_dispatch_push_id, spv::DecorationRestrictPointer); node_input.private_coalesce_offset_id = create_variable(spv::StorageClassPrivate, u32_type_id, "NodeCoalesceOffset"); node_input.private_coalesce_count_id = create_variable(spv::StorageClassPrivate, u32_type_id, "NodeCoalesceCount"); node_input.u32_ptr_type_id = u32_ptr_type_id; node_input.u32_array_ptr_type_id = u32_array_ptr_type_id; spv::Id u64_struct_type_id = builder().makeStructType({ u64_type_id }, "NodeReadonlyU64Ptr"); builder().addDecoration(u64_struct_type_id, spv::DecorationBlock); builder().addMemberDecoration(u64_struct_type_id, 0, spv::DecorationOffset, 0); builder().addMemberDecoration(u64_struct_type_id, 0, spv::DecorationNonWritable); builder().addMemberName(u64_struct_type_id, 0, "value"); node_input.u64_ptr_type_id = builder().makePointer(spv::StorageClassPhysicalStorageBuffer, u64_struct_type_id); return true; } NodeOutputData Converter::Impl::get_node_output(llvm::MDNode *output) { NodeOutputData data = {}; uint32_t num_ops = output->getNumOperands(); for (uint32_t i = 0; i < num_ops; i += 2) { auto tag = DXIL::NodeMetadataTag(get_constant_metadata(output, i)); if (tag == DXIL::NodeMetadataTag::NodeOutputID) { auto *output_node = llvm::cast(output->getOperand(i + 1)); data.node_id = get_string_metadata(output_node, 0); data.node_array_index = get_constant_metadata(output_node, 1); } else if (tag == DXIL::NodeMetadataTag::NodeAllowSparseNodes) data.sparse_array = get_constant_metadata(output, i + 1) != 0; else if (tag == DXIL::NodeMetadataTag::NodeOutputArraySize) data.node_array_size = get_constant_metadata(output, i + 1); else if (tag == DXIL::NodeMetadataTag::NodeMaxRecords) data.max_records = get_constant_metadata(output, i + 1); } return data; } NodeInputData Converter::Impl::get_node_input(llvm::MDNode *meta) { NodeInputData node = {}; auto *launch_type_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeLaunchType); if (!launch_type_node) return {}; node.launch_type = DXIL::NodeLaunchType( llvm::cast(*launch_type_node)->getValue()->getUniqueInteger().getZExtValue()); if (node.launch_type == DXIL::NodeLaunchType::Invalid) return {}; auto *is_program_entry_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeIsProgramEntry); if (is_program_entry_node) { node.is_program_entry = llvm::cast(*is_program_entry_node)->getValue()->getUniqueInteger().getZExtValue() != 0; } node.is_indirect_bda_stride_program_entry_spec_id = NodeSpecIdIndirectPayloadStride; node.is_entry_point_spec_id = NodeSpecIdIsEntryPoint; if (node.launch_type == DXIL::NodeLaunchType::Broadcasting) { node.dispatch_grid_is_upper_bound_spec_id = NodeSpecIdDispatchGridIsUpperBound; node.is_static_broadcast_node_spec_id = NodeSpecIdIsStaticBroadcastNode; node.max_broadcast_grid_spec_id[0] = NodeSpecIdMaxBroadcastGridX; node.max_broadcast_grid_spec_id[1] = NodeSpecIdMaxBroadcastGridY; node.max_broadcast_grid_spec_id[2] = NodeSpecIdMaxBroadcastGridZ; } else { node.dispatch_grid_is_upper_bound_spec_id = UINT32_MAX; node.is_static_broadcast_node_spec_id = UINT32_MAX; for (auto &spec_id : node.max_broadcast_grid_spec_id) spec_id = UINT32_MAX; } auto *recursion_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeMaxRecursionDepth); if (recursion_node) { node.recursion_factor = llvm::cast(*recursion_node)->getValue()->getUniqueInteger().getZExtValue(); } if (node.launch_type == DXIL::NodeLaunchType::Broadcasting) { auto *max_grid = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeMaxDispatchGrid); const llvm::MDOperand *fixed_grid; if (max_grid) { node.dispatch_grid_is_upper_bound = true; fixed_grid = max_grid; } else fixed_grid = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeDispatchGrid); if (!fixed_grid) return {}; for (uint32_t i = 0; i < 3; i++) node.broadcast_grid[i] = get_constant_metadata(llvm::cast(*fixed_grid), i); } node.thread_group_size_spec_id[0] = NodeSpecIdGroupSizeX; node.thread_group_size_spec_id[1] = NodeSpecIdGroupSizeY; node.thread_group_size_spec_id[2] = NodeSpecIdGroupSizeZ; auto *name_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeID); if (name_node) { auto *name_id = llvm::cast(*name_node); node.node_id = get_string_metadata(name_id, 0); node.node_array_index = get_constant_metadata(name_id, 1); } auto *inputs_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeInputs); llvm::MDNode *input = nullptr; if (inputs_node) { auto *inputs = llvm::cast(*inputs_node); // Current spec only allows one input node. if (inputs->getNumOperands() != 1) return {}; input = llvm::cast(inputs->getOperand(0)); } if (input) { uint32_t num_ops = input->getNumOperands(); node.grid_buffer = node_parse_dispatch_grid(input); node.payload_stride = node_parse_payload_stride(input, node.node_track_rw_input_sharing); for (uint32_t i = 0; i < num_ops; i += 2) { auto tag = DXIL::NodeMetadataTag(get_constant_metadata(input, i)); if (tag == DXIL::NodeMetadataTag::NodeMaxRecords) node.coalesce_factor = get_constant_metadata(input, i + 1); } // We seem to need a sensible default. if (node.coalesce_factor == 0 && node.launch_type == DXIL::NodeLaunchType::Coalescing) node.coalesce_factor = 1; } auto *share_input_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeShareInputOf); if (share_input_node) { auto *share_input = llvm::cast(*share_input_node); node.node_share_input_id = get_string_metadata(share_input, 0); node.node_share_input_array_index = get_constant_metadata(share_input, 1); } auto *local_argument_node = get_shader_property_tag(meta, DXIL::ShaderPropertyTag::NodeLocalRootArgumentsTableIndex); if (local_argument_node) { node.local_root_arguments_table_index = llvm::cast(*local_argument_node)->getValue()->getUniqueInteger().getZExtValue(); } else node.local_root_arguments_table_index = UINT32_MAX; return node; } NodeInputData Converter::get_node_input(const LLVMBCParser &parser, const char *entry) { auto *entry_point_meta = get_entry_point_meta(parser.get_module(), entry); if (!entry_point_meta) return {}; return Impl::get_node_input(entry_point_meta); } Vector Converter::get_node_outputs(const LLVMBCParser &parser, const char *entry) { Vector output_data; auto *entry_point_meta = get_entry_point_meta(parser.get_module(), entry); if (!entry_point_meta) return {}; auto *outputs_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::NodeOutputs); if (outputs_node) { auto *outputs = llvm::cast(*outputs_node); for (unsigned i = 0; i < outputs->getNumOperands(); i++) { auto *output = llvm::cast(outputs->getOperand(i)); output_data.push_back(Impl::get_node_output(output)); } } // Spec constant IDs are allowed incrementally. // Spec constant ID 0 is reserved for workgroup size spec constant. uint32_t spec_constant_id = NodeSpecIdOutputBase; for (auto &output : output_data) { output.node_index_spec_constant_id = spec_constant_id; spec_constant_id++; } return output_data; } String Converter::get_analysis_warnings() const { String str; if (impl->shader_analysis.needs_auto_group_shared_barriers) { // This is a case that might just happen to work if the game assumes lock-step execution. // If the group size is larger, it's extremely unlikely the game works by chance on native drivers. // Some shaders seem to use groupshared as a sort of "scratch space" per thread, which // is a valid use case and does not require barriers to be correct. str += "- Has group shared access, but no group shared barrier anywhere.\n"; } return str; } bool Converter::Impl::emit_execution_modes_node() { // It will be necessary to override all this metadata through some API. // Not really needed to support this until we've implemented everything. NodeInputData node = get_node_input(entry_point_meta); if (node.launch_type == DXIL::NodeLaunchType::Invalid) return false; node_input.node_id = node.node_id; node_input.node_array_index = node.node_array_index; node_input.launch_type = node.launch_type; node_input.dispatch_grid = node.grid_buffer; node_input.payload_stride = node.payload_stride; node_input.coalesce_stride = node.coalesce_factor; if (!emit_execution_modes_node_input()) return false; auto *outputs_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::NodeOutputs); if (outputs_node) { auto *outputs = llvm::cast(*outputs_node); for (unsigned i = 0; i < outputs->getNumOperands(); i++) { auto *output = llvm::cast(outputs->getOperand(i)); if (!emit_execution_modes_node_output(output)) return false; } } node_input.is_indirect_payload_stride_id = builder().makeBoolConstant(false, true); builder().addDecoration(node_input.is_indirect_payload_stride_id, spv::DecorationSpecId, int(node.is_indirect_bda_stride_program_entry_spec_id)); builder().addName(node_input.is_indirect_payload_stride_id, "NodeEntryIndirectPayloadStride"); node_input.is_entry_point_id = builder().makeBoolConstant(node.is_program_entry, true); builder().addDecoration(node_input.is_entry_point_id, spv::DecorationSpecId, int(node.is_entry_point_spec_id)); builder().addName(node_input.is_entry_point_id, "NodeIsProgramEntry"); if (node_input.launch_type == DXIL::NodeLaunchType::Broadcasting) { node_input.broadcast_has_max_grid_id = builder().makeBoolConstant(node.dispatch_grid_is_upper_bound, true); builder().addDecoration(node_input.broadcast_has_max_grid_id, spv::DecorationSpecId, int(node.dispatch_grid_is_upper_bound_spec_id)); builder().addName(node_input.broadcast_has_max_grid_id, "DispatchGridIsUpperBound"); node_input.is_static_broadcast_node_id = builder().makeBoolConstant(false, true); builder().addDecoration(node_input.is_static_broadcast_node_id, spv::DecorationSpecId, int(node.is_static_broadcast_node_spec_id)); builder().addName(node_input.is_static_broadcast_node_id, "DispatchStaticPayload"); spv::Id u32_type = builder().makeUintType(32); for (uint32_t i = 0; i < 3; i++) { node_input.max_broadcast_grid_id[i] = builder().makeUintConstant(node.broadcast_grid[i], true); builder().addDecoration(node_input.max_broadcast_grid_id[i], spv::DecorationSpecId, int(node.max_broadcast_grid_spec_id[i])); static const char *names[] = { "MaxBroadcastGridX", "MaxBroadcastGridY", "MaxBroadcastGridZ" }; builder().addName(node_input.max_broadcast_grid_id[i], names[i]); node_input.max_broadcast_grid_minus_1_id[i] = builder().createSpecConstantOp( spv::OpISub, u32_type, { node_input.max_broadcast_grid_id[i], builder().makeUintConstant(1) }, {}); static const char *sub_names[] = { "GridXMinus1", "GridYMinus1", "GridZMinus1" }; builder().addName(node_input.max_broadcast_grid_minus_1_id[i], sub_names[i]); } } return emit_execution_modes_compute(); } bool Converter::Impl::emit_execution_modes_compute() { auto *num_threads_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::NumThreads); if (num_threads_node) { auto *num_threads = llvm::cast(*num_threads_node); return emit_execution_modes_thread_wave_properties(num_threads); } else return false; } static bool entry_point_modifies_sample_mask(const llvm::MDNode *node) { if (!node->getOperand(2)) return false; auto &signature = node->getOperand(2); auto *signature_node = llvm::cast(signature); auto &outputs = signature_node->getOperand(1); if (!outputs) return false; auto *outputs_node = llvm::dyn_cast(outputs); for (unsigned i = 0; i < outputs_node->getNumOperands(); i++) { auto *output = llvm::cast(outputs_node->getOperand(i)); auto system_value = static_cast(get_constant_metadata(output, 3)); if (system_value == DXIL::Semantic::Depth || system_value == DXIL::Semantic::DepthLessEqual || system_value == DXIL::Semantic::DepthGreaterEqual || system_value == DXIL::Semantic::StencilRef || system_value == DXIL::Semantic::Coverage) { return true; } } return false; } static uint64_t get_shader_flags(const llvm::MDNode *entry_point_meta) { auto *flags_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::ShaderFlags); if (flags_node) return llvm::cast(*flags_node)->getValue()->getUniqueInteger().getZExtValue(); else return 0; } bool Converter::Impl::emit_execution_modes_pixel_late() { auto &builder = spirv_module.get_builder(); if (execution_mode_meta.declares_rov) { builder.addExtension("SPV_EXT_fragment_shader_interlock"); if (execution_mode_meta.per_sample_shading) { builder.addCapability(spv::CapabilityFragmentShaderSampleInterlockEXT); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSampleInterlockOrderedEXT); } else { builder.addCapability(spv::CapabilityFragmentShaderPixelInterlockEXT); builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModePixelInterlockOrderedEXT); } } return true; } bool Converter::Impl::emit_execution_modes_pixel() { auto &builder = spirv_module.get_builder(); auto flags = get_shader_flags(entry_point_meta); bool early_depth_stencil = (flags & DXIL::ShaderFlagEarlyDepthStencil) != 0; if (options.descriptor_qa_enabled || options.instruction_instrumentation.enabled) { // If we have descriptor QA enabled, we will have side effects when running fragment shaders. // This forces late-Z which can trigger some horrible performance issues. // Make sure to enable early depth-stencil if nothing in the shader is early/late sensitive. if (!entry_point_modifies_sample_mask(entry_point_meta) && !shader_analysis.has_side_effects && !shader_analysis.discards) { early_depth_stencil = true; } } if (early_depth_stencil) builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeEarlyFragmentTests); // Avoid masking helper lanes when strict_helper_lane_waveops is used. // Execution modes to enable correct Vulkan behaviour are set up later. auto *func = get_entry_point_function(entry_point_meta); execution_mode_meta.waveops_include_helper_lanes = func->hasFnAttribute("waveops-include-helper-lanes"); // If helper lanes don't exist, don't bother trying to mask them out, // it will just confuse the compiler. spirv_module.set_helper_lanes_participate_in_wave_ops( !options.strict_helper_lane_waveops || execution_model != spv::ExecutionModelFragment || execution_mode_meta.waveops_include_helper_lanes); return true; } bool Converter::Impl::emit_execution_modes_domain() { auto &builder = spirv_module.get_builder(); builder.addCapability(spv::CapabilityTessellation); auto *ds_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::DSState); if (ds_state_node) { auto *arguments = llvm::cast(*ds_state_node); auto domain = static_cast(get_constant_metadata(arguments, 0)); auto *func = spirv_module.get_entry_function(); switch (domain) { case DXIL::TessellatorDomain::IsoLine: builder.addExecutionMode(func, spv::ExecutionModeIsolines); break; case DXIL::TessellatorDomain::Tri: builder.addExecutionMode(func, spv::ExecutionModeTriangles); break; case DXIL::TessellatorDomain::Quad: builder.addExecutionMode(func, spv::ExecutionModeQuads); break; default: LOGE("Unknown tessellator domain!\n"); return false; } unsigned input_control_points = get_constant_metadata(arguments, 1); execution_mode_meta.stage_input_num_vertex = input_control_points; return true; } else return false; } bool Converter::Impl::emit_execution_modes_hull() { auto &builder = spirv_module.get_builder(); builder.addCapability(spv::CapabilityTessellation); auto *hs_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::HSState); if (hs_state_node) { auto *arguments = llvm::cast(*hs_state_node); auto *patch_constant = llvm::cast(arguments->getOperand(0)); auto *patch_constant_value = patch_constant->getValue(); execution_mode_meta.patch_constant_function = llvm::cast(patch_constant_value); unsigned input_control_points = get_constant_metadata(arguments, 1); unsigned output_control_points = get_constant_metadata(arguments, 2); auto domain = static_cast(get_constant_metadata(arguments, 3)); auto partitioning = static_cast(get_constant_metadata(arguments, 4)); auto primitive = static_cast(get_constant_metadata(arguments, 5)); auto *func = spirv_module.get_entry_function(); switch (domain) { case DXIL::TessellatorDomain::IsoLine: builder.addExecutionMode(func, spv::ExecutionModeIsolines); break; case DXIL::TessellatorDomain::Tri: builder.addExecutionMode(func, spv::ExecutionModeTriangles); break; case DXIL::TessellatorDomain::Quad: builder.addExecutionMode(func, spv::ExecutionModeQuads); break; default: LOGE("Unknown tessellator domain!\n"); return false; } switch (partitioning) { case DXIL::TessellatorPartitioning::Integer: builder.addExecutionMode(func, spv::ExecutionModeSpacingEqual); break; case DXIL::TessellatorPartitioning::Pow2: LOGE("Emulating Pow2 spacing as Integer.\n"); builder.addExecutionMode(func, spv::ExecutionModeSpacingEqual); break; case DXIL::TessellatorPartitioning::FractionalEven: builder.addExecutionMode(func, spv::ExecutionModeSpacingFractionalEven); break; case DXIL::TessellatorPartitioning::FractionalOdd: builder.addExecutionMode(func, spv::ExecutionModeSpacingFractionalOdd); break; default: LOGE("Unknown tessellator partitioning.\n"); return false; } switch (primitive) { case DXIL::TessellatorOutputPrimitive::TriangleCCW: builder.addExecutionMode(func, spv::ExecutionModeVertexOrderCcw); break; case DXIL::TessellatorOutputPrimitive::TriangleCW: builder.addExecutionMode(func, spv::ExecutionModeVertexOrderCw); break; case DXIL::TessellatorOutputPrimitive::Point: builder.addExecutionMode(func, spv::ExecutionModePointMode); // TODO: Do we have to specify CCW/CW in point mode? break; case DXIL::TessellatorOutputPrimitive::Line: break; default: LOGE("Unknown tessellator primitive.\n"); return false; } builder.addExecutionMode(func, spv::ExecutionModeOutputVertices, output_control_points); execution_mode_meta.stage_input_num_vertex = input_control_points; execution_mode_meta.stage_output_num_vertex = output_control_points; return true; } else return false; } bool Converter::Impl::emit_execution_modes_geometry() { auto &builder = spirv_module.get_builder(); builder.addCapability(spv::CapabilityGeometry); auto *gs_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::GSState); if (gs_state_node) { auto *arguments = llvm::cast(*gs_state_node); auto input_primitive = static_cast(get_constant_metadata(arguments, 0)); unsigned max_vertex_count = get_constant_metadata(arguments, 1); auto *func = spirv_module.get_entry_function(); auto topology = static_cast(get_constant_metadata(arguments, 3)); unsigned gs_instances = get_constant_metadata(arguments, 4); execution_mode_meta.gs_stream_active_mask = get_constant_metadata(arguments, 2); builder.addExecutionMode(func, spv::ExecutionModeInvocations, gs_instances); builder.addExecutionMode(func, spv::ExecutionModeOutputVertices, max_vertex_count); switch (input_primitive) { case DXIL::InputPrimitive::Point: builder.addExecutionMode(func, spv::ExecutionModeInputPoints); execution_mode_meta.stage_input_num_vertex = 1; break; case DXIL::InputPrimitive::Line: builder.addExecutionMode(func, spv::ExecutionModeInputLines); execution_mode_meta.stage_input_num_vertex = 2; break; case DXIL::InputPrimitive::LineWithAdjacency: builder.addExecutionMode(func, spv::ExecutionModeInputLinesAdjacency); execution_mode_meta.stage_input_num_vertex = 4; break; case DXIL::InputPrimitive::Triangle: builder.addExecutionMode(func, spv::ExecutionModeTriangles); execution_mode_meta.stage_input_num_vertex = 3; break; case DXIL::InputPrimitive::TriangleWithAdjaceny: builder.addExecutionMode(func, spv::ExecutionModeInputTrianglesAdjacency); execution_mode_meta.stage_input_num_vertex = 6; break; default: LOGE("Unexpected input primitive (%u).\n", unsigned(input_primitive)); return false; } switch (topology) { case DXIL::PrimitiveTopology::PointList: builder.addExecutionMode(func, spv::ExecutionModeOutputPoints); break; case DXIL::PrimitiveTopology::LineStrip: builder.addExecutionMode(func, spv::ExecutionModeOutputLineStrip); break; case DXIL::PrimitiveTopology::TriangleStrip: builder.addExecutionMode(func, spv::ExecutionModeOutputTriangleStrip); break; default: LOGE("Unexpected output primitive topology (%u).\n", unsigned(topology)); return false; } return true; } else return false; } bool Converter::Impl::emit_execution_modes_ray_tracing(spv::ExecutionModel model) { auto &builder = spirv_module.get_builder(); builder.addCapability(spv::CapabilityRayTracingKHR); if (options.ray_tracing_primitive_culling_enabled && shader_analysis.can_require_primitive_culling) builder.addCapability(spv::CapabilityRayTraversalPrimitiveCullingKHR); if (options.opacity_micromap_enabled && shader_analysis.can_require_opacity_micromap) { builder.addCapability(spv::CapabilityRayTracingOpacityMicromapEXT); builder.addExtension("SPV_EXT_opacity_micromap"); } builder.addExtension("SPV_KHR_ray_tracing"); builder.addExtension("SPV_EXT_descriptor_indexing"); // For DXR, we'll need full bindless. builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT); builder.addCapability(spv::CapabilitySampledImageArrayDynamicIndexing); builder.addCapability(spv::CapabilitySampledImageArrayNonUniformIndexing); builder.addCapability(spv::CapabilityStorageImageArrayDynamicIndexing); builder.addCapability(spv::CapabilityStorageImageArrayNonUniformIndexing); builder.addCapability(spv::CapabilityStorageBufferArrayDynamicIndexing); builder.addCapability(spv::CapabilityStorageBufferArrayNonUniformIndexing); builder.addCapability(spv::CapabilityUniformBufferArrayDynamicIndexing); builder.addCapability(spv::CapabilityUniformBufferArrayNonUniformIndexing); return true; } bool Converter::Impl::emit_execution_modes_thread_wave_properties(const llvm::MDNode *num_threads) { auto &builder = spirv_module.get_builder(); if (options.force_wave_size_enable && options.force_subgroup_size) { execution_mode_meta.wave_size_min = options.force_subgroup_size; execution_mode_meta.wave_size_max = 0; execution_mode_meta.wave_size_preferred = 0; } else { auto *wave_size_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::WaveSize); auto *wave_size_range_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::RangedWaveSize); if (wave_size_range_node) { auto *wave_size = llvm::cast(*wave_size_range_node); execution_mode_meta.wave_size_min = get_constant_metadata(wave_size, 0); execution_mode_meta.wave_size_max = get_constant_metadata(wave_size, 1); execution_mode_meta.wave_size_preferred = get_constant_metadata(wave_size, 2); } else if (wave_size_node) { auto *wave_size = llvm::cast(*wave_size_node); execution_mode_meta.wave_size_min = get_constant_metadata(wave_size, 0); execution_mode_meta.wave_size_max = 0; execution_mode_meta.wave_size_preferred = 0; } } unsigned threads[3]; for (unsigned dim = 0; dim < 3; dim++) threads[dim] = get_constant_metadata(num_threads, dim); unsigned total_workgroup_threads = threads[0] * threads[1] * threads[2]; if (execution_model == spv::ExecutionModelGLCompute) { if ((total_workgroup_threads <= 32 && shader_analysis.require_subgroups) || (shader_analysis.subgroup_ballot_reads_first && !shader_analysis.subgroup_ballot_reads_upper)) { // Common game bug. Only reading the first scalar of a ballot probably means // the shader relies on WaveSize <= 32. suggest_maximum_wave_size(32); } } if (shader_analysis.require_compute_shader_derivatives) { if (execution_model != spv::ExecutionModelGLCompute && execution_model != spv::ExecutionModelTaskEXT && execution_model != spv::ExecutionModelMeshEXT) { LOGE("Derivatives only supported in compute, task and mesh shaders.\n"); return false; } // For sanity, verify that dimensions align sufficiently. // Spec says that product of workgroup size must align with 4. if (total_workgroup_threads % 4 == 0) { bool derivatives_2d = (threads[0] % 2 == 0) && (threads[1] % 2 == 0); if (options.compute_shader_derivatives) { builder.addExtension(options.compute_shader_derivatives_khr ? "SPV_KHR_compute_shader_derivatives" : "SPV_NV_compute_shader_derivatives"); if (derivatives_2d && options.compute_shader_derivatives_quad) { builder.addCapability(spv::CapabilityComputeDerivativeGroupQuadsKHR); // It is technically not in spec to just assume this since subgroup lane mapping to local invocation index // is not defined without this. In practice on NV, this holds based on our testing. builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDerivativeGroupQuadsKHR); } else { builder.addCapability(spv::CapabilityComputeDerivativeGroupLinearKHR); // It is technically not in spec to just assume this since subgroup lane mapping to local invocation index // is not defined without this. In practice on NV, this holds based on our testing. builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDerivativeGroupLinearKHR); } } // If the X and Y dimensions align with 2, // we need to assume that any quad op works on a 2D dispatch. execution_mode_meta.synthesize_2d_quad_dispatch = !options.compute_shader_derivatives_quad && derivatives_2d; if (execution_mode_meta.synthesize_2d_quad_dispatch) { threads[0] *= 2; threads[1] /= 2; } } else { // DXC is robust against this case. // Derivatives become meaningless now, so we have to fake the results. execution_mode_meta.synthesize_dummy_derivatives = true; LOGW("Invalid use of compute shader derivatives detected. Falling back to robust results.\n"); } } for (unsigned dim = 0; dim < 3; dim++) execution_mode_meta.workgroup_threads[dim] = threads[dim]; if (execution_model_lib_target) { threads[0] = builder.makeUintConstant(threads[0], true); threads[1] = builder.makeUintConstant(threads[1], true); threads[2] = builder.makeUintConstant(threads[2], true); builder.addDecoration(threads[0], spv::DecorationSpecId, NodeSpecIdGroupSizeX); builder.addDecoration(threads[1], spv::DecorationSpecId, NodeSpecIdGroupSizeY); builder.addDecoration(threads[2], spv::DecorationSpecId, NodeSpecIdGroupSizeZ); builder.addExecutionModeId(spirv_module.get_entry_function(), spv::ExecutionModeLocalSizeId, threads[0], threads[1], threads[2]); node_input.thread_group_size_id = builder.makeCompositeConstant(builder.makeVectorType(builder.makeUintType(32), 3), { threads[0], threads[1], threads[2] }, true); builder.addName(node_input.thread_group_size_id, "ThreadGroupSize"); } else { builder.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeLocalSize, threads[0], threads[1], threads[2]); } return true; } bool Converter::Impl::emit_execution_modes_amplification() { auto &builder = spirv_module.get_builder(); builder.addExtension("SPV_EXT_mesh_shader"); builder.addCapability(spv::CapabilityMeshShadingEXT); auto *as_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::ASState); if (as_state_node) { auto *arguments = llvm::cast(*as_state_node); auto *num_threads = llvm::cast(arguments->getOperand(0)); return emit_execution_modes_thread_wave_properties(num_threads); } else return false; } bool Converter::Impl::emit_execution_modes_mesh() { auto &builder = spirv_module.get_builder(); auto *func = spirv_module.get_entry_function(); builder.addExtension("SPV_EXT_mesh_shader"); builder.addCapability(spv::CapabilityMeshShadingEXT); auto *ms_state_node = get_shader_property_tag(entry_point_meta, DXIL::ShaderPropertyTag::MSState); if (ms_state_node) { auto *arguments = llvm::cast(*ms_state_node); unsigned max_vertex_count = get_constant_metadata(arguments, 1); unsigned max_primitive_count = get_constant_metadata(arguments, 2); auto topology = static_cast(get_constant_metadata(arguments, 3)); unsigned index_count; builder.addExecutionMode(func, spv::ExecutionModeOutputVertices, std::max(1, max_vertex_count)); builder.addExecutionMode(func, spv::ExecutionModeOutputPrimitivesEXT, std::max(1, max_primitive_count)); switch (topology) { case DXIL::MeshOutputTopology::Undefined: index_count = 0; break; case DXIL::MeshOutputTopology::Line: builder.addExecutionMode(func, spv::ExecutionModeOutputLinesEXT); index_count = 2; break; case DXIL::MeshOutputTopology::Triangle: builder.addExecutionMode(func, spv::ExecutionModeOutputTrianglesEXT); index_count = 3; break; default: LOGE("Unexpected mesh output topology (%u).\n", unsigned(topology)); return false; } execution_mode_meta.stage_output_num_vertex = max_vertex_count; execution_mode_meta.stage_output_num_primitive = max_primitive_count; execution_mode_meta.primitive_index_dimension = index_count; auto *num_threads = llvm::cast(arguments->getOperand(0)); return emit_execution_modes_thread_wave_properties(num_threads); } else return false; } bool Converter::Impl::emit_execution_modes_fp_denorm_rounding() { // Check for SM 6.2 denorm handling. Only applies to FP32. auto *func = get_entry_point_function(entry_point_meta); if (!func) return true; // NVIDIA hack. The way the driver exposes float controls is very unfortunate. // If only partial denorm support is enabled, assume we cannot freely control FP32 behavior either. // However, for SM 6.2+, we have to force it on NVIDIA, even if driver doesn't actually expose it. bool supports_full_denorm_control_fp32 = options.supports_float16_denorm_preserve && options.supports_float64_denorm_preserve; // Plain DXIL only supports fp32-denorm-mode, the rest are internal extensions. const struct { const char *tag; int bits; bool supported; } denorms[] = { { "dxbc-fp16-denorm-mode", 16, options.supports_float16_denorm_preserve }, { "dxbc-fp32-denorm-mode", 32, supports_full_denorm_control_fp32 }, { "dxbc-fp64-denorm-mode", 64, options.supports_float64_denorm_preserve }, { "fp32-denorm-mode", 32, true }, }; // For whatever reason, NVIDIA loses a tremendous amount of performance from setting rounding modes. // Just ignore it since it's always RTE in practice anyway. #if 0 static const struct { const char *tag; int bits; } rounding[] = { { "dxbc-fp16-round-mode", 16 }, { "dxbc-fp32-round-mode", 32 }, { "dxbc-fp64-round-mode", 64 }, }; #endif for (auto &d : denorms) { if (!d.supported) continue; auto attr = func->getFnAttribute(d.tag); auto str = attr.getValueAsString(); if (str == "ftz") { builder().addExtension("SPV_KHR_float_controls"); builder().addCapability(spv::CapabilityDenormFlushToZero); builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormFlushToZero, d.bits); } else if (str == "preserve") { builder().addExtension("SPV_KHR_float_controls"); builder().addCapability(spv::CapabilityDenormPreserve); builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormPreserve, d.bits); } } #if 0 for (auto &r : rounding) { auto attr = func->getFnAttribute(r.tag); auto str = attr.getValueAsString(); if (str == "rtz") { builder().addExtension("SPV_KHR_float_controls"); builder().addCapability(spv::CapabilityRoundingModeRTZ); builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRoundingModeRTZ, r.bits); } else if (str == "rte") { builder().addExtension("SPV_KHR_float_controls"); builder().addCapability(spv::CapabilityRoundingModeRTE); builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRoundingModeRTE, r.bits); } } #endif if (shader_analysis.require_wmma && GlobalConfiguration::get().wmma_rdna3_workaround) { // FP16 RTZ allows faster conversions on AMD. // This hack only makes sense on RDNA3. builder().addExtension("SPV_KHR_float_controls"); builder().addCapability(spv::CapabilityRoundingModeRTZ); builder().addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRoundingModeRTZ, 16); } return true; } bool Converter::Impl::analyze_execution_modes_meta() { auto *meta = entry_point_meta; if (execution_model_lib_target) if (auto *null_meta = get_null_entry_point_meta(bitcode_parser.get_module())) meta = null_meta; auto flags = get_shader_flags(meta); execution_mode_meta.native_16bit_operations = (flags & DXIL::ShaderFlagNativeLowPrecision) != 0; return true; } void Converter::Impl::emit_execution_modes_post_code_generation() { auto &b = builder(); if (module_is_dxilconv(bitcode_parser.get_module())) { // We should use these globally, but don't want to invalidate all Fossilize archives just yet. // Shader instrumentation may declare its own preservation modes, so only declare execution modes // if we haven't done anything. if (!builder().hasCapability(spv::CapabilitySignedZeroInfNanPreserve)) { b.addExtension("SPV_KHR_float_controls"); b.addCapability(spv::CapabilitySignedZeroInfNanPreserve); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 32); if (b.hasCapability(spv::CapabilityFloat64)) b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 64); } // DXBC assumes flush-to-zero, but dxilconv doesn't explicitly emit that, since it's not in SM 6.0. if (!b.hasCapability(spv::CapabilityDenormFlushToZero) && !b.hasCapability(spv::CapabilityDenormPreserve)) { b.addExtension("SPV_KHR_float_controls"); b.addCapability(spv::CapabilityDenormFlushToZero); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormFlushToZero, 32); } } // Custom IR is expected to set this with extended attributes. if (!module_is_dxbc_spirv(bitcode_parser.get_module())) { // Float16 and Float64 require denorms to be preserved in D3D12. if (b.hasCapability(spv::CapabilityFloat16) && options.supports_float16_denorm_preserve) { b.addExtension("SPV_KHR_float_controls"); b.addCapability(spv::CapabilityDenormPreserve); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormPreserve, 16); } if (b.hasCapability(spv::CapabilityFloat64) && options.supports_float64_denorm_preserve) { b.addExtension("SPV_KHR_float_controls"); b.addCapability(spv::CapabilityDenormPreserve); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeDenormPreserve, 64); } } else { // If instrumentation didn't add these already. if (!builder().hasCapability(spv::CapabilitySignedZeroInfNanPreserve)) { // Set SignedZeroInfNanPreserve by default for new IR. // We should use these globally, but don't want to invalidate all Fossilize archives just yet. b.addExtension("SPV_KHR_float_controls"); b.addCapability(spv::CapabilitySignedZeroInfNanPreserve); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 32); if (b.hasCapability(spv::CapabilityFloat16)) b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 16); if (b.hasCapability(spv::CapabilityFloat64)) b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeSignedZeroInfNanPreserve, 64); } } // Opt into quad derivatives and maximal reconvergence for fragment shaders using // QuadAll/QuadAny intrinsics to get meaningful behaviour for quad-uniform control // flow, other quad ops are ignored for now. if (options.supports_quad_control && execution_model == spv::ExecutionModelFragment && execution_mode_meta.needs_quad_derivatives) { b.addExtension("SPV_KHR_quad_control"); b.addCapability(spv::CapabilityQuadControlKHR); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeRequireFullQuadsKHR); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeQuadDerivativesKHR); } if (options.supports_maximal_reconvergence && (options.force_maximal_reconvergence || execution_mode_meta.waveops_include_helper_lanes || execution_mode_meta.needs_quad_derivatives || shader_analysis.need_maximal_reconvergence_helper_call)) { b.addExtension("SPV_KHR_maximal_reconvergence"); b.addExecutionMode(spirv_module.get_entry_function(), spv::ExecutionModeMaximallyReconvergesKHR); } } bool Converter::Impl::emit_execution_modes_late() { switch (execution_model) { case spv::ExecutionModelFragment: if (!emit_execution_modes_pixel_late()) return false; break; default: break; } return true; } bool Converter::Impl::emit_execution_modes() { switch (execution_model) { case spv::ExecutionModelGLCompute: if (execution_model_lib_target) { if (!emit_execution_modes_node()) return false; } else { if (!emit_execution_modes_compute()) return false; } break; case spv::ExecutionModelGeometry: if (!emit_execution_modes_geometry()) return false; break; case spv::ExecutionModelTessellationControl: if (!emit_execution_modes_hull()) return false; break; case spv::ExecutionModelTessellationEvaluation: if (!emit_execution_modes_domain()) return false; break; case spv::ExecutionModelFragment: if (!emit_execution_modes_pixel()) return false; break; case spv::ExecutionModelRayGenerationKHR: case spv::ExecutionModelMissKHR: case spv::ExecutionModelIntersectionKHR: case spv::ExecutionModelAnyHitKHR: case spv::ExecutionModelCallableKHR: case spv::ExecutionModelClosestHitKHR: if (!emit_execution_modes_ray_tracing(execution_model)) return false; break; case spv::ExecutionModelTaskEXT: if (!emit_execution_modes_amplification()) return false; break; case spv::ExecutionModelMeshEXT: if (!emit_execution_modes_mesh()) return false; break; default: break; } if (!emit_execution_modes_fp_denorm_rounding()) return false; return true; } ConvertedFunction::Function Converter::Impl::build_rov_main(const Vector &visit_order, CFGNodePool &pool, Vector &leaves) { auto *code_main = convert_function(visit_order, true); // Need to figure out if our ROV use is trivial. If not, we will wrap the entire function in ROV pairs. CFGStructurizer cfg{code_main, pool, spirv_module}; bool trivial_rewrite = cfg.rewrite_rov_lock_region(); if (trivial_rewrite) return { code_main, spirv_module.get_entry_function() }; // If we need to fallback we need a wrapper function. Replace the entry point. spv::Block *code_entry; auto *code_func = builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(), "code_main", {}, {}, &code_entry); code_func->moveLocalDeclarationsFrom(spirv_module.get_entry_function()); auto *entry = pool.create_node(); entry->ir.operations.push_back(allocate(spv::OpBeginInvocationInterlockEXT)); auto *call_op = allocate(spv::OpFunctionCall, builder().makeVoidType()); call_op->add_id(code_func->getId()); entry->ir.operations.push_back(call_op); entry->ir.operations.push_back(allocate(spv::OpEndInvocationInterlockEXT)); entry->ir.terminator.type = Terminator::Type::Return; leaves.push_back({ code_main, code_func }); return { entry, spirv_module.get_entry_function() }; } ConvertedFunction::Function Converter::Impl::build_node_main(const Vector &visit_order, CFGNodePool &pool, Vector &leaves) { spv::Block *node_entry; auto *node_func = builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(), "node_main", {}, {}, &node_entry); // Set build point so alloca() functions can create variables correctly. builder().setBuildPoint(node_entry); auto *node_main = convert_function(visit_order, true); leaves.push_back({ node_main, node_func }); auto *entry = pool.create_node(); current_block = &entry->ir.operations; entry->ir.terminator.type = Terminator::Type::Return; if (!emit_workgraph_dispatcher(*this, pool, entry, node_func->getId())) return {}; return { entry, spirv_module.get_entry_function() }; } void Converter::Impl::emit_patch_output_lowering(CFGNode *bb) { auto *node = entry_point_meta; current_block = &bb->ir.operations; assert(node->getOperand(2)); auto &signature = node->getOperand(2); auto *signature_node = llvm::cast(signature); auto &patch_variables = signature_node->getOperand(2); if (!patch_variables) return; auto *patch_node = llvm::dyn_cast(patch_variables); spv::Id u32_type = builder().makeUintType(32); spv::Id uvec4_type = builder().makeVectorType(u32_type, 4); for (unsigned i = 0; i < patch_node->getNumOperands(); i++) { auto *patch = llvm::cast(patch_node->getOperand(i)); auto element_id = get_constant_metadata(patch, 0); auto actual_element_type = normalize_component_type(static_cast(get_constant_metadata(patch, 2))); auto system_value = static_cast(get_constant_metadata(patch, 3)); if (system_value != DXIL::Semantic::User) continue; auto rows = get_constant_metadata(patch, 6); auto cols = get_constant_metadata(patch, 7); auto start_row = get_constant_metadata(patch, 8); auto start_col = get_constant_metadata(patch, 9); auto &meta = patch_elements_meta[element_id]; assert(meta.id); for (unsigned row = 0; row < rows; row++) { auto *chain = allocate(spv::OpAccessChain, builder().makePointer(spv::StorageClassPrivate, uvec4_type)); chain->add_id(execution_mode_meta.patch_lowering_array_var_id); chain->add_id(builder().makeUintConstant(row + start_row)); add(chain); auto *load_op = allocate(spv::OpLoad, uvec4_type); load_op->add_id(chain->id); add(load_op); spv::Id store_id; if (cols == 4) { store_id = load_op->id; } else if (cols > 1) { auto *shuffle_op = allocate(spv::OpVectorShuffle, get_type_id(DXIL::ComponentType::U32, 1, cols)); shuffle_op->add_id(load_op->id); shuffle_op->add_id(load_op->id); for (unsigned c = 0; c < cols; c++) shuffle_op->add_literal(c + start_col); add(shuffle_op); store_id = shuffle_op->id; } else { auto *extract_op = allocate(spv::OpCompositeExtract, u32_type); extract_op->add_id(load_op->id); extract_op->add_literal(start_col); add(extract_op); store_id = extract_op->id; } if (actual_element_type != DXIL::ComponentType::U32) { auto *cast = allocate(spv::OpBitcast, get_type_id(actual_element_type, 1, cols)); cast->add_id(store_id); add(cast); store_id = cast->id; } auto *store_op = allocate(spv::OpStore); if (rows > 1) { auto *store_chain = allocate( spv::OpAccessChain, builder().makePointer(spv::StorageClassOutput, get_type_id(actual_element_type, 1, cols))); store_chain->add_id(meta.id); store_chain->add_id(builder().makeUintConstant(row)); add(store_chain); store_op->add_id(store_chain->id); } else { store_op->add_id(meta.id); } store_op->add_id(store_id); add(store_op); } } } CFGNode *Converter::Impl::build_hull_passthrough_function(CFGNodePool &pool) { // Hull shader may have a null main entry, which indicates a default passthrough function should be invoked to copy // all inputs to corresponding outputs auto *entry = pool.create_node(); auto &signature = entry_point_meta->getOperand(2); if (!signature) return {}; auto *signature_node = llvm::cast(signature); auto &inputs = signature_node->getOperand(0); if (!inputs) return {}; auto *inputs_node = llvm::dyn_cast(inputs); auto &outputs = signature_node->getOperand(1); if (!outputs) return {}; auto *outputs_node = llvm::dyn_cast(outputs); if (!inputs_node || !outputs_node) return {}; auto &builder = spirv_module.get_builder(); // InvocationId is the control point ID used to index into the input arrays. auto *load_cipd_op = allocate(spv::OpLoad, builder.makeUintType(32)); load_cipd_op->add_id(spirv_module.get_builtin_shader_input(spv::BuiltInInvocationId)); entry->ir.operations.push_back(load_cipd_op); unsigned num_entries = std::min(inputs_node->getNumOperands(), outputs_node->getNumOperands()); // It's a little unclear if match by meta entry order, or by row/col. // Without any test to prove otherwise, keep it simple. for (unsigned i = 0; i < num_entries; i++) { auto *input = llvm::cast(inputs_node->getOperand(i)); auto element_id = get_constant_metadata(input, 0); auto actual_element_type = normalize_component_type(static_cast(get_constant_metadata(input, 2))); auto effective_element_type = get_effective_input_output_type(actual_element_type); auto rows = get_constant_metadata(input, 6); auto cols = get_constant_metadata(input, 7); auto type_id = get_type_id(effective_element_type, rows, cols); auto *input_chain = allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassInput, type_id)); input_chain->add_id(input_elements_meta[element_id].id); input_chain->add_id(load_cipd_op->id); entry->ir.operations.push_back(input_chain); auto *load_op = allocate(spv::OpLoad, type_id); load_op->add_id(input_chain->id); entry->ir.operations.push_back(load_op); auto *output = llvm::cast(outputs_node->getOperand(i)); element_id = get_constant_metadata(output, 0); auto *output_chain = allocate(spv::OpAccessChain, builder.makePointer(spv::StorageClassOutput, type_id)); output_chain->add_id(output_elements_meta[element_id].id); output_chain->add_id(load_cipd_op->id); entry->ir.operations.push_back(output_chain); auto *store_op = allocate(spv::OpStore); store_op->add_id(output_chain->id); store_op->add_id(load_op->id); entry->ir.operations.push_back(store_op); } entry->ir.terminator.type = Terminator::Type::Return; return entry; } ConvertedFunction::Function Converter::Impl::build_hull_main(const Vector &visit_order, const Vector &patch_visit_order, CFGNodePool &pool, Vector &leaves) { // Just make sure there is an entry block already created. spv::Block *hull_entry = nullptr, *patch_entry = nullptr; auto *hull_func = builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(), "hull_main", {}, {}, &hull_entry); auto *patch_func = builder().makeFunctionEntry(spv::NoPrecision, builder().makeVoidType(), "patch_main", {}, {}, &patch_entry); // Set build point so alloca() functions can create variables correctly. if (hull_entry) builder().setBuildPoint(hull_entry); CFGNode *hull_main = nullptr; if (!visit_order.empty()) hull_main = convert_function(visit_order, true); else hull_main = build_hull_passthrough_function(pool); builder().setBuildPoint(patch_entry); auto *patch_main = convert_function(patch_visit_order, false); builder().setBuildPoint(spirv_module.get_entry_function()->getEntryBlock()); if (hull_main) leaves.push_back({ hull_main, hull_func }); leaves.push_back({ patch_main, patch_func }); auto *entry = pool.create_node(); Operation *call_op; if (hull_func) { call_op = allocate(spv::OpFunctionCall, builder().makeVoidType()); call_op->add_id(hull_func->getId()); entry->ir.operations.push_back(call_op); } if (execution_mode_meta.stage_output_num_vertex > 1) { auto *load_op = allocate(spv::OpLoad, builder().makeUintType(32)); load_op->add_id(spirv_module.get_builtin_shader_input(spv::BuiltInInvocationId)); entry->ir.operations.push_back(load_op); auto *cmp_op = allocate(spv::OpIEqual, builder().makeBoolType()); cmp_op->add_ids({ load_op->id, builder().makeUintConstant(0) }); entry->ir.operations.push_back(cmp_op); if (hull_main) { auto *barrier_op = allocate(spv::OpControlBarrier); // Not 100% sure what to emit here. Just do what glslang does. barrier_op->add_id(builder().makeUintConstant(spv::ScopeWorkgroup)); if (execution_mode_meta.memory_model == spv::MemoryModelVulkan) { barrier_op->add_id(builder().makeUintConstant(spv::ScopeWorkgroup)); barrier_op->add_id(builder().makeUintConstant( spv::MemorySemanticsOutputMemoryMask | spv::MemorySemanticsAcquireReleaseMask)); } else { barrier_op->add_id(builder().makeUintConstant(spv::ScopeInvocation)); barrier_op->add_id(builder().makeUintConstant(0)); } entry->ir.operations.push_back(barrier_op); } auto *patch_block = pool.create_node(); auto *merge_block = pool.create_node(); entry->add_branch(patch_block); entry->add_branch(merge_block); patch_block->add_branch(merge_block); entry->ir.terminator.type = Terminator::Type::Condition; entry->ir.terminator.true_block = patch_block; entry->ir.terminator.false_block = merge_block; entry->ir.terminator.conditional_id = cmp_op->id; patch_block->ir.terminator.type = Terminator::Type::Branch; patch_block->ir.terminator.direct_block = merge_block; call_op = allocate(spv::OpFunctionCall, builder().makeVoidType()); call_op->add_id(patch_func->getId()); patch_block->ir.operations.push_back(call_op); if (execution_mode_meta.patch_lowering_array_var_id) emit_patch_output_lowering(patch_block); merge_block->ir.terminator.type = Terminator::Type::Return; } else { call_op = allocate(spv::OpFunctionCall, builder().makeVoidType()); call_op->add_id(patch_func->getId()); entry->ir.operations.push_back(call_op); entry->ir.terminator.type = Terminator::Type::Return; if (execution_mode_meta.patch_lowering_array_var_id) emit_patch_output_lowering(entry); } return { entry, spirv_module.get_entry_function() }; } void Converter::Impl::build_function_bb_visit_order_inner_analysis( Vector &bbs, UnorderedSet &visited, llvm::BasicBlock *bb) { if (visited.count(bb)) return; visited.insert(bb); // Check for special case where we optimize to direct branch. auto *term = bb->getTerminator(); if (auto *inst = llvm::dyn_cast(term)) { if (inst->isConditional()) { bool cond_value; if (can_optimize_conditional_branch_to_static(*this, inst->getCondition(), cond_value)) { auto *succ = inst->getSuccessor(cond_value ? 0 : 1); build_function_bb_visit_order_inner_analysis(bbs, visited, succ); bbs.push_back(bb); return; } } } for (auto itr = llvm::succ_begin(bb); itr != llvm::succ_end(bb); ++itr) { auto *succ = *itr; build_function_bb_visit_order_inner_analysis(bbs, visited, succ); } bbs.push_back(bb); } Vector Converter::Impl::build_function_bb_visit_order_analysis(llvm::Function *func) { if (!func) return {}; UnorderedSet visited; Vector visit_order; auto *entry = &func->getEntryBlock(); build_function_bb_visit_order_inner_analysis(visit_order, visited, entry); // Get natural traverse order, input is post-traversal order. std::reverse(visit_order.begin(), visit_order.end()); return visit_order; } void Converter::Impl::build_function_bb_visit_register(llvm::BasicBlock *bb, CFGNodePool &pool, String tag) { auto entry_meta = std::make_unique(bb); bb_map[bb] = entry_meta.get(); auto *entry_node = pool.create_node(); bb_map[bb]->node = entry_node; entry_node->name = std::move(tag); metas.push_back(std::move(entry_meta)); } // This only exists so that we can avoid nuking all existing Fossilize caches with completely new shaders. // This traversal order is not a perfect reverse post-traversal, // so we cannot use it for analysis with alloca() -> CBV forwarding checks. // Once we are ready to consider doing large scale SPIR-V changes that invalidate all caches anyway, // we might as well get rid of this path in the same update and use the common analysis path. Vector Converter::Impl::build_function_bb_visit_order_legacy( llvm::Function *func, CFGNodePool &pool) { if (!func) return {}; auto *entry = &func->getEntryBlock(); build_function_bb_visit_register(entry, pool, ".entry"); Vector to_process; Vector processing; to_process.push_back(entry); Vector visit_order; unsigned fake_label_id = 0; const auto queue_visit_succ = [&](llvm::BasicBlock *block, llvm::BasicBlock *succ) { if (!bb_map.count(succ)) { to_process.push_back(succ); build_function_bb_visit_register(succ, pool, dxil_spv::to_string(++fake_label_id)); } bb_map[block]->node->add_branch(bb_map[succ]->node); }; // Traverse the CFG and register all blocks in the pool. while (!to_process.empty()) { std::swap(to_process, processing); for (auto *block : processing) { visit_order.push_back(block); auto *term = block->getTerminator(); if (auto *inst = llvm::dyn_cast(term)) { if (inst->isConditional()) { bool cond_value; if (can_optimize_conditional_branch_to_static(*this, inst->getCondition(), cond_value)) { auto *succ = inst->getSuccessor(cond_value ? 0 : 1); queue_visit_succ(block, succ); continue; } } } for (auto itr = llvm::succ_begin(block); itr != llvm::succ_end(block); ++itr) queue_visit_succ(block, *itr); } processing.clear(); } return visit_order; } void Converter::Impl::emit_write_instrumentation_invocation_id(CFGNode *node) { current_block = &node->ir.operations; spv::Id alloc_id = spirv_module.get_helper_call_id(HelperCall::AllocateInvocationID); auto *call = allocate(spv::OpFunctionCall, builder().makeUintType(32)); call->add_id(alloc_id); add(call); auto *store = allocate(spv::OpStore); store->add_id(instrumentation.invocation_id_var_id); store->add_id(call->id); add(store); } void Converter::Impl::gather_function_dependencies(llvm::Function *caller, Vector &funcs) { if (std::find(funcs.begin(), funcs.end(), caller) != funcs.end()) return; // Avoid exponential explosion while traversing. funcs.push_back(caller); for (auto &bb : *caller) { for (auto &inst : bb) { if (const auto *call_inst = llvm::dyn_cast(&inst)) { auto *fn = call_inst->getCalledFunction(); if (strncmp(fn->getName().data(), "dx.op", 5) != 0 && strncmp(fn->getName().data(), "llvm.", 5) != 0) { gather_function_dependencies(fn, funcs); } } } } // Ensure leaves come before their caller. funcs.erase(std::find(funcs.begin(), funcs.end(), caller)); funcs.push_back(caller); } bool Converter::Impl::build_callee_functions(CFGNodePool &pool, const Vector &callees, Vector &leaves) { llvm::Function *func = get_entry_point_function(entry_point_meta); for (auto *leaf_func : callees) { if (leaf_func == func || leaf_func == execution_mode_meta.patch_constant_function) continue; Vector arg_types; spv::Block *spv_entry; // Cannot safely use function-local undefs now. shader_analysis.global_undefs = true; arg_types.reserve(leaf_func->getFunctionType()->getNumParams()); for (uint32_t i = 0; i < leaf_func->getFunctionType()->getNumParams(); i++) arg_types.push_back(get_type_id(leaf_func->getFunctionType()->getParamType(i))); auto *spv_func = builder().makeFunctionEntry(spv::NoPrecision, get_type_id(leaf_func->getFunctionType()->getReturnType()), #ifdef HAVE_LLVMBC leaf_func->getName().c_str(), #else leaf_func->getName().str().c_str(), #endif arg_types, {}, &spv_entry); rewrite_value(leaf_func, spv_func->getId()); auto arg_iter = leaf_func->arg_begin(); for (uint32_t i = 0, n = leaf_func->getFunctionType()->getNumParams(); i < n; i++, ++arg_iter) rewrite_value(&*arg_iter, spv_func->getParamId(i)); auto visit_order = build_function_bb_visit_order_analysis(leaf_func); for (auto *bb : visit_order) build_function_bb_visit_register(bb, pool, ""); for (auto *bb : visit_order) for (auto itr = llvm::succ_begin(bb); itr != llvm::succ_end(bb); ++itr) bb_map[bb]->node->add_branch(bb_map[*itr]->node); builder().setBuildPoint(spv_entry); auto *entry = convert_function(visit_order, false); if (!entry) return false; leaves.push_back({ entry, spv_func }); } builder().setBuildPoint(spirv_module.get_entry_function()->getEntryBlock()); return true; } CFGNode *Converter::Impl::convert_function(const Vector &visit_order, bool primary_code) { bool has_partial_unroll = false; for (auto *bb : visit_order) { auto *meta = bb_map[bb]; CFGNode *node = meta->node; combined_image_sampler_cache.clear(); peephole_transformation_cache.clear(); memoized = {}; if (bb == visit_order.front()) { current_block = &node->ir.operations; if (!emit_view_masking(*this)) return {}; if (!emit_view_instancing_fixed_layer_viewport(*this, true)) return {}; if (instrumentation.invocation_id_var_id && primary_code) emit_write_instrumentation_invocation_id(node); } auto sink_itr = bb_to_sinks.find(bb); if (sink_itr != bb_to_sinks.end()) { for (auto *instruction : sink_itr->second) { auto itr = value_map.find(instruction); if (itr != value_map.end()) value_map.erase(itr); if (!emit_instruction(node, *instruction)) { LOGE("Failed to emit instruction.\n"); return {}; } } } // Scan opcodes. for (auto &instruction : *bb) { if (!emit_instruction(node, instruction)) { LOGE("Failed to emit instruction.\n"); return {}; } } ags.reset(); nvapi.reset(); // We don't know if the block is a loop yet, so just tag every BB. // CFG will propagate the information as necessary. node->ir.terminator.force_flatten = options.branch_control.force_flatten; node->ir.terminator.force_branch = options.branch_control.force_branch; node->ir.terminator.force_unroll = options.branch_control.force_unroll; node->ir.terminator.force_loop = options.branch_control.force_loop; auto *instruction = bb->getTerminator(); if (auto *inst = llvm::dyn_cast(instruction)) { // Loop information is attached to the back edge in LLVM. // Continue blocks can be direct branches or conditional ones, so make it generic. auto *loop_meta = instruction->getMetadata("llvm.loop"); if (loop_meta && loop_meta->getNumOperands() >= 2) { auto *meta_node = llvm::dyn_cast(loop_meta->getOperand(1)); if (meta_node) { auto *meta_name = llvm::dyn_cast(meta_node->getOperand(0)); if (meta_name) { #ifdef HAVE_LLVMBC auto &str = meta_name->getString(); #else auto str = meta_name->getString(); #endif if (options.branch_control.use_shader_metadata) { if (str == "llvm.loop.unroll.disable") { node->ir.terminator.force_loop = true; node->ir.terminator.force_unroll = false; } else if (str == "llvm.loop.unroll.full") { node->ir.terminator.force_unroll = true; node->ir.terminator.force_loop = false; } } if (str == "llvm.loop.unroll.count") has_partial_unroll = true; } } } if (inst->isConditional()) { // Works around some pathological unrolling scenarios where games may unroll based on WaveGetLaneCount(). bool cond_value; if (can_optimize_conditional_branch_to_static(*this, inst->getCondition(), cond_value)) { node->ir.terminator.type = Terminator::Type::Branch; node->ir.terminator.direct_block = bb_map[inst->getSuccessor(cond_value ? 0 : 1)]->node; } else { node->ir.terminator.type = Terminator::Type::Condition; node->ir.terminator.conditional_id = get_id_for_value(inst->getCondition()); assert(inst->getNumSuccessors() == 2); node->ir.terminator.true_block = bb_map[inst->getSuccessor(0)]->node; node->ir.terminator.false_block = bb_map[inst->getSuccessor(1)]->node; if (options.branch_control.use_shader_metadata) { auto *branch_meta = inst->getMetadata("dx.controlflow.hints"); if (branch_meta && branch_meta->getNumOperands() >= 3) { if (get_constant_metadata(branch_meta, 2) == 1) { node->ir.terminator.force_branch = true; node->ir.terminator.force_flatten = false; } else if (get_constant_metadata(branch_meta, 2) == 2) { node->ir.terminator.force_flatten = true; node->ir.terminator.force_branch = false; } } } } } else { node->ir.terminator.type = Terminator::Type::Branch; assert(inst->getNumSuccessors() == 1); node->ir.terminator.direct_block = bb_map[inst->getSuccessor(0)]->node; // If the shader uses partial unrolling, but we see loops anyway, // it's very likely we really want this to be a loop. // This is somewhat of a hack heuristic to work around a Mesa bug in Lords of the Fallen, // but it makes at least some sense ... if (has_partial_unroll) node->ir.terminator.force_loop = true; } } else if (auto *inst = llvm::dyn_cast(instruction)) { node->ir.terminator.type = Terminator::Type::Switch; Terminator::Case default_case = {}; default_case.is_default = true; default_case.node = bb_map[inst->getDefaultDest()]->node; node->ir.terminator.cases.push_back(default_case); node->ir.terminator.conditional_id = get_id_for_value(inst->getCondition()); for (auto itr = inst->case_begin(); itr != inst->case_end(); ++itr) { Terminator::Case switch_case = {}; switch_case.node = bb_map[itr->getCaseSuccessor()]->node; switch_case.value = uint32_t(itr->getCaseValue()->getUniqueInteger().getZExtValue()); node->ir.terminator.cases.push_back(switch_case); } } else if (auto *inst = llvm::dyn_cast(instruction)) { node->ir.terminator.type = Terminator::Type::Return; if (inst->getReturnValue()) node->ir.terminator.return_value = get_id_for_value(inst->getReturnValue()); } else if (llvm::isa(instruction)) { node->ir.terminator.type = Terminator::Type::Unreachable; } else { LOGE("Unsupported terminator ...\n"); return {}; } #ifdef HAVE_LLVMBC // Forward structured control flow. if (bb->get_merge() == llvm::BasicBlock::Merge::Selection) { node->ir.merge_info.merge_type = MergeType::Selection; // Assume both paths can return or break, leaving the merge unreachable. if (bb->get_merge_bb() && bb_map.count(bb->get_merge_bb())) node->ir.merge_info.merge_block = bb_map[bb->get_merge_bb()]->node; } else if (bb->get_merge() == llvm::BasicBlock::Merge::Loop) { node->ir.merge_info.merge_type = MergeType::Loop; // In infinite loops, merge block may be unreachable. if (bb->get_merge_bb() && bb_map.count(bb->get_merge_bb())) node->ir.merge_info.merge_block = bb_map[bb->get_merge_bb()]->node; // If back-edge is not reachable, we'll resolve that later. if (bb->get_continue_bb() && bb_map.count(bb->get_continue_bb())) node->ir.merge_info.continue_block = bb_map[bb->get_continue_bb()]->node; } #endif } // Rewrite PHI incoming values if we have to. if (!phi_incoming_rewrite.empty()) { for (auto *bb : visit_order) { CFGNode *node = bb_map[bb]->node; for (auto &phi : node->ir.phi) { for (auto &incoming : phi.incoming) { auto itr = phi_incoming_rewrite.find(incoming.id); if (itr != phi_incoming_rewrite.end()) incoming.id = itr->second; } } } } return bb_map[visit_order.front()]->node; } void Converter::Impl::mark_used_value(const llvm::Value *value) { if (!llvm::isa(value)) { // Technically, we won't be able to eliminate a chain of SSA expressions // which are unused this way, but eeeeeeh. DXC really should handle that. // This is to deal with odd-ball edge cases where random single SSA instructions // were not eliminated for whatever reason. llvm_used_ssa_values.insert(value); } } void Converter::Impl::mark_used_values(const llvm::Instruction *instruction) { if (auto *phi_inst = llvm::dyn_cast(instruction)) { for (unsigned i = 0, n = phi_inst->getNumIncomingValues(); i < n; i++) { auto *incoming = phi_inst->getIncomingValue(i); // Ignore self-referential PHI. Someone else need to refer to us. if (incoming != phi_inst) mark_used_value(incoming); } } else if (const auto *ret_inst = llvm::dyn_cast(instruction)) { if (ret_inst->getReturnValue()) mark_used_value(ret_inst->getReturnValue()); } else if (const auto *cond_inst = llvm::dyn_cast(instruction)) { if (cond_inst->isConditional()) mark_used_value(cond_inst->getCondition()); } else if (const auto *switch_inst = llvm::dyn_cast(instruction)) { mark_used_value(switch_inst->getCondition()); } else { for (unsigned i = 0, n = instruction->getNumOperands(); i < n; i++) mark_used_value(instruction->getOperand(i)); } } static bool instruction_is_precise_sensitive(const llvm::Instruction *value) { if (auto *binary_op = llvm::dyn_cast(value)) { auto opcode = binary_op->getOpcode(); switch (opcode) { case llvm::BinaryOperator::BinaryOps::FAdd: case llvm::BinaryOperator::BinaryOps::FSub: case llvm::BinaryOperator::BinaryOps::FMul: case llvm::BinaryOperator::BinaryOps::FDiv: case llvm::BinaryOperator::BinaryOps::FRem: return true; default: break; } } else if (value_is_dx_op_instrinsic(value, DXIL::Op::FMad) || value_is_dx_op_instrinsic(value, DXIL::Op::Dot2) || value_is_dx_op_instrinsic(value, DXIL::Op::Dot2AddHalf) || value_is_dx_op_instrinsic(value, DXIL::Op::Dot3) || value_is_dx_op_instrinsic(value, DXIL::Op::Dot4)) { return true; } return false; } static bool instruction_requires_no_contraction(const llvm::Instruction *value) { if (instruction_is_precise_sensitive(value)) { if (auto *binary_op = llvm::dyn_cast(value)) return !binary_op->isFast(); else return llvm::cast(value)->hasMetadata("dx.precise"); } return false; } static void propagate_precise(UnorderedSet &cache, const llvm::Instruction *value); static void mark_precise(UnorderedSet &cache, const llvm::Value *value) { // Stop propagating when we hit something not an instruction, i.e. a constant or variable (alloca is very rare). if (auto *inst = llvm::dyn_cast(value)) { if (instruction_is_precise_sensitive(inst) && !instruction_requires_no_contraction(inst)) { if (auto *call_inst = llvm::dyn_cast(inst)) const_cast(call_inst)->setMetadata("dx.precise", nullptr); else if (auto *binary_op = llvm::dyn_cast(inst)) const_cast(binary_op)->setFast(false); } propagate_precise(cache, inst); } } static void propagate_precise(UnorderedSet &cache, const llvm::Instruction *value) { if (cache.count(value) != 0) return; cache.insert(value); if (const auto *phi = llvm::dyn_cast(value)) { for (unsigned i = 0, n = phi->getNumIncomingValues(); i < n; i++) mark_precise(cache, phi->getIncomingValue(i)); } else { for (unsigned i = 0, n = value->getNumOperands(); i < n; i++) mark_precise(cache, value->getOperand(i)); } } static void propagate_precise(llvm::Function *func) { Vector precise_instructions; for (auto &bb : *func) for (auto &inst : bb) if (instruction_requires_no_contraction(&inst)) precise_instructions.push_back(&inst); UnorderedSet visitation_cache; for (auto *inst : precise_instructions) propagate_precise(visitation_cache, inst); } void Converter::Impl::analyze_instructions_post_execution_modes() { if ((options.quirks.group_shared_auto_barrier || !shader_analysis.has_group_shared_barrier) && shader_analysis.has_group_shared_access) { unsigned num_threads = execution_mode_meta.workgroup_threads[0] * execution_mode_meta.workgroup_threads[1] * execution_mode_meta.workgroup_threads[2]; if (options.quirks.group_shared_auto_barrier || (num_threads <= 32 && num_threads > 1)) { // This is a case that might just happen to work if the game assumes lock-step execution on NV + AMD (rip Intel). // If the group size is larger, it's extremely unlikely the game "just works" by chance on native drivers. // Some shaders seem to use groupshared as a sort of "scratch space" per thread, which // is a valid use case and does not require barriers to be correct. shader_analysis.needs_auto_group_shared_barriers = true; } } } bool Converter::Impl::analyze_instructions(llvm::Function *func) { // Need to analyze this in two stages. // In the first stage, we need to analyze: // - Load/GetElementPtr to handle lib global variables // - CreateHandle family to build LLVM access handles // - ExtractValue to track which components are used for BufferLoad. // In the second phase we analyze the buffer loads and stores and figure out // alignments of the loads and stores. This lets us build up a list of SSBO declarations we need to // optimally implement the loads and stores. We need to do this late, because we depend on results // of ExtractValue analysis. if (func && options.propagate_precise && !options.force_precise) propagate_precise(func); auto visit_order = build_function_bb_visit_order_analysis(func); for (auto *bb : visit_order) { if (options.eliminate_dead_code) mark_used_values(bb->getTerminator()); for (auto &inst : *bb) { if (options.eliminate_dead_code) mark_used_values(&inst); if (auto *load_inst = llvm::dyn_cast(&inst)) { if (!analyze_load_instruction(*this, load_inst)) return false; } else if (auto *store_inst = llvm::dyn_cast(&inst)) { if (!analyze_store_instruction(*this, store_inst)) return false; } else if (auto *phi_inst = llvm::dyn_cast(&inst)) { if (!analyze_phi_instruction(*this, phi_inst)) return false; } else if (auto *atomicrmw_inst = llvm::dyn_cast(&inst)) { if (!analyze_atomicrmw_instruction(*this, atomicrmw_inst)) return false; } else if (auto *cmpxchg_inst = llvm::dyn_cast(&inst)) { if (!analyze_cmpxchg_instruction(*this, cmpxchg_inst)) return false; } else if (auto *alloca_inst = llvm::dyn_cast(&inst)) { if (!analyze_alloca_instruction(*this, alloca_inst)) return false; } else if (auto *getelementptr_inst = llvm::dyn_cast(&inst)) { if (!analyze_getelementptr_instruction(*this, getelementptr_inst)) return false; } else if (auto *extractvalue_inst = llvm::dyn_cast(&inst)) { if (!analyze_extractvalue_instruction(*this, extractvalue_inst)) return false; } else if (auto *cmp_inst = llvm::dyn_cast(&inst)) { if (!analyze_compare_instruction(*this, cmp_inst)) return false; } else if (auto *call_inst = llvm::dyn_cast(&inst)) { auto *called_function = call_inst->getCalledFunction(); if (strncmp(called_function->getName().data(), "dx.op", 5) == 0) { if (!analyze_dxil_instruction_primary_pass(*this, call_inst, bb)) return false; } } } // Reset vendor tracking for every BB. ags.reset(); nvapi.reset(); } for (auto *bb : visit_order) { for (auto &inst : *bb) { if (auto *call_inst = llvm::dyn_cast(&inst)) { auto *called_function = call_inst->getCalledFunction(); if (strncmp(called_function->getName().data(), "dx.op", 5) == 0) { if (!analyze_dxil_instruction_secondary_pass(*this, call_inst)) return false; } } } // Reset vendor tracking for every BB. ags.reset(); nvapi.reset(); } for (auto &alloc : alloca_tracking) { // Mark required resource aliases before we emit resources. Defer some work until after resource creation. const auto *scalar_type = alloc.first->getType()->getPointerElementType()->getArrayElementType(); if (!analyze_alloca_cbv_forwarding_pre_resource_emit(*this, scalar_type, alloc.second)) return false; } ags.reset_analysis(); nvapi.reset_analysis(); if (shader_analysis.require_wmma) execution_mode_meta.memory_model = spv::MemoryModelVulkan; return true; } bool Converter::Impl::composite_is_accessed(const llvm::Value *composite) const { return llvm_composite_meta.find(composite) != llvm_composite_meta.end(); } ConvertedFunction Converter::Impl::convert_entry_point() { ConvertedFunction result = {}; auto &module = bitcode_parser.get_module(); entry_point_meta = get_entry_point_meta(module, options.entry_point.empty() ? nullptr : options.entry_point.c_str()); execution_model = get_execution_model(module, entry_point_meta); execution_model_lib_target = get_execution_model_lib_target(module, entry_point_meta); if (execution_model_lib_target && execution_model == spv::ExecutionModelGLCompute) { // Might as well go with SPIR-V 1.6. Then we get subgroup size control semantics for "free". // When we're willing to do a clean break with Fossilize all shaders should target SPIR-V 1.6. spirv_module.set_override_spirv_version(0x10600); } else if (execution_model == spv::ExecutionModelFragment && resource_mapping_iface && resource_mapping_iface->has_nontrivial_stage_input_remapping()) { // Force SPIR-V 1.4 for fragment shaders if we might end up requiring mesh shader capabilities. // Non-trivial stage input remapping may require PerPrimitiveEXT decoration. spirv_module.set_override_spirv_version(0x10400); } if (!entry_point_meta) { if (!options.entry_point.empty()) LOGE("Could not find entry point \"%s\".\n", options.entry_point.c_str()); else LOGE("Could not find any entry point.\n"); return result; } if (!options.shader_source_file.empty()) { auto &builder = spirv_module.get_builder(); uint32_t sm_major = 0, sm_minor = 0; get_shader_model(module, nullptr, &sm_major, &sm_minor); builder.setSource(spv::SourceLanguageUnknown, sm_major * 100 + sm_minor); builder.setSourceFile(options.shader_source_file); } result.node_pool = std::make_unique(); auto &pool = *result.node_pool; bool need_bda = options.physical_storage_buffer || (execution_model_lib_target && execution_model == spv::ExecutionModelGLCompute); spirv_module.set_descriptor_qa_info(options.descriptor_qa); options.instruction_instrumentation.fp16 = options.min_precision_prefer_native_16bit || execution_mode_meta.native_16bit_operations; spirv_module.set_instruction_instrumentation_info(options.instruction_instrumentation); llvm::Function *func = get_entry_point_function(entry_point_meta); auto visit_order = build_function_bb_visit_order_legacy(func, pool); Vector patch_visit_order; // dxilconv emits somewhat broken code for min16float for resource access. // Just use FP32 here since that's what we've tested and avoids lots of awkward workarounds. if (module_is_dxilconv(module)) options.min_precision_prefer_native_16bit = false; if (module_is_dxbc_spirv(module)) { backend.skip_non_uniform_promotion = true; // This is new code, might as well exercise it. execution_mode_meta.memory_model = spv::MemoryModelVulkan; } // Need to analyze some execution modes early which affect opcode analysis later. if (!analyze_execution_modes_meta()) return result; if (!emit_resources_global_mapping()) return result; if (!analyze_instructions(func)) return result; spirv_module.emit_entry_point(get_execution_model(module, entry_point_meta), "main", need_bda, execution_mode_meta.memory_model); if (!emit_execution_modes()) return result; if (execution_mode_meta.patch_constant_function) patch_visit_order = build_function_bb_visit_order_legacy(execution_mode_meta.patch_constant_function, pool); Vector callees; if (func) gather_function_dependencies(func, callees); if (execution_mode_meta.patch_constant_function) gather_function_dependencies(execution_mode_meta.patch_constant_function, callees); // Analyze all leaf functions. for (auto *leaf_func : callees) if (leaf_func != func && !analyze_instructions(leaf_func)) return result; if (!emit_resources()) return result; if (!emit_stage_input_variables()) return result; if (!emit_stage_output_variables()) return result; if (!emit_patch_variables()) return result; if (!emit_other_variables()) return result; if (!emit_global_variables()) return result; if (options.extended_non_semantic_info) for (auto &info : non_semantic_debug_info) emit_non_semantic_debug_info(info); // Some execution modes depend on other execution modes, so handle that here. if (!emit_execution_modes_late()) return result; analyze_instructions_post_execution_modes(); execution_mode_meta.entry_point_name = get_entry_point_name(entry_point_meta); if (!build_callee_functions(pool, callees, result.leaf_functions)) return result; if (execution_model == spv::ExecutionModelTessellationControl) result.entry = build_hull_main(visit_order, patch_visit_order, pool, result.leaf_functions); else if (execution_mode_meta.declares_rov) result.entry = build_rov_main(visit_order, pool, result.leaf_functions); else if (execution_model_lib_target && execution_model == spv::ExecutionModelGLCompute) result.entry = build_node_main(visit_order, pool, result.leaf_functions); else { result.entry.entry = convert_function(visit_order, true); if (shader_analysis.needs_auto_group_shared_barriers && options.quirks.group_shared_auto_barrier) { CFGStructurizer cfg{result.entry.entry, pool, spirv_module}; cfg.rewrite_auto_group_shared_barrier(); } if (shader_analysis.require_subgroup_shuffles) { CFGStructurizer cfg{result.entry.entry, pool, spirv_module}; cfg.flatten_subgroup_shuffles(); } if (options.quirks.fixup_loop_header_undef_phis) { CFGStructurizer cfg{result.entry.entry, pool, spirv_module}; cfg.fixup_loop_header_undef_phis(); } result.entry.func = spirv_module.get_entry_function(); } #ifdef HAVE_LLVMBC if (func && func->get_structured_control_flow()) { // For TESC, the entry is a custom dispatch function. result.entry.is_structured = execution_model != spv::ExecutionModelTessellationControl; for (auto &leaf : result.leaf_functions) leaf.is_structured = true; } #endif // Some execution modes depend on code generation, handle that here. emit_execution_modes_post_code_generation(); return result; } Operation *Converter::Impl::allocate(spv::Op op) { return spirv_module.allocate_op(op); } Operation *Converter::Impl::allocate(spv::Op op, spv::Id id, spv::Id type_id) { assert(type_id != 0); assert(id != 0); return spirv_module.allocate_op(op, id, type_id); } Operation *Converter::Impl::allocate(spv::Op op, spv::Id type_id) { assert(type_id != 0); return spirv_module.allocate_op(op, spirv_module.allocate_id(), type_id); } Operation *Converter::Impl::allocate(spv::Op op, const llvm::Value *value) { // Constant expressions cannot have an associated opcode ID to them. assert(!llvm::isa(value)); return spirv_module.allocate_op(op, get_id_for_value(value), get_type_id(value->getType())); } Operation *Converter::Impl::allocate(spv::Op op, const llvm::Value *value, spv::Id type_id) { // Constant expressions cannot have an associated opcode ID to them. assert(!llvm::isa(value)); assert(type_id != 0); return spirv_module.allocate_op(op, get_id_for_value(value), type_id); } void Converter::Impl::rewrite_value(const llvm::Value *value, spv::Id id) { auto value_itr = value_map.find(value); if (value_itr != value_map.end()) { if (value_itr->second != id) { // If a PHI node previously accessed the value ID map, it will now refer to a dead // ID. Remember to rewrite PHI incoming nodes as necessary. phi_incoming_rewrite[value_itr->second] = id; value_itr->second = id; } } else value_map[value] = id; } void Converter::Impl::add(Operation *op, bool is_rov) { assert(current_block); if (is_rov) current_block->push_back(allocate(spv::OpBeginInvocationInterlockEXT)); current_block->push_back(op); if (is_rov) current_block->push_back(allocate(spv::OpEndInvocationInterlockEXT)); } void Converter::Impl::register_externally_visible_write(const llvm::Value *value) { if (!options.instruction_instrumentation.enabled || options.instruction_instrumentation.type != InstructionInstrumentationType::ExternallyVisibleWriteNanInf) return; // Ignore undefs and intentional nan/inf writes. if (llvm::isa(value)) return; // Punch through any bitcasts. // Sometimes, shaders want to store floats as uints for practical reasons. while (llvm::isa(value)) { auto *cast = llvm::cast(value); if (cast->getOpcode() == llvm::CastInst::CastOps::BitCast) value = cast->getOperand(0); else break; } switch (value->getType()->getTypeID()) { case llvm::Type::TypeID::HalfTyID: case llvm::Type::TypeID::FloatTyID: case llvm::Type::TypeID::DoubleTyID: { auto *op = allocate(spv::PseudoOpInstrumentExternallyVisibleStore); op->add_id(get_id_for_value(value)); add(op); break; } default: break; } } spv::Builder &Converter::Impl::builder() { return spirv_module.get_builder(); } spv::Id Converter::Impl::create_variable(spv::StorageClass storage, spv::Id type_id, const char *name) { return spirv_module.create_variable(storage, type_id, name); } spv::Id Converter::Impl::create_variable_with_initializer(spv::StorageClass storage, spv::Id type_id, spv::Id initializer, const char *name) { return spirv_module.create_variable_with_initializer(storage, type_id, initializer, name); } spv::StorageClass Converter::Impl::get_effective_storage_class(const llvm::Value *value, spv::StorageClass fallback) const { auto itr = handle_to_storage_class.find(value); if (itr != handle_to_storage_class.end()) return itr->second; else return fallback; } bool Converter::Impl::get_needs_temp_storage_copy(const llvm::Value *value) const { // We always need a temp storage copy if this isn't // directly the result of an alloca instruction. if (!llvm::dyn_cast(value)) return true; // We'll also need a temp storage copy if this // alloca is directly referenced by // a TraceRay AND a CallShader. return needs_temp_storage_copy.count(value) != 0; } spv::Id Converter::Impl::get_temp_payload(spv::Id type, spv::StorageClass storage) { for (const auto &temp_payload : temp_payloads) { if (temp_payload.type == type && temp_payload.storage == storage) return temp_payload.id; } spv::Id var_id = create_variable(storage, type); temp_payloads.push_back(TempPayloadEntry{ type, storage, var_id }); return var_id; } DXIL::ComponentType Converter::Impl::get_effective_typed_resource_type(DXIL::ComponentType type) { // Expand/contract on load/store. // DXIL can emit half textures for example, // but we need to contract or expand instead. return convert_16bit_component_to_32bit(type); } DXIL::ComponentType Converter::Impl::get_effective_input_output_type(DXIL::ComponentType type) { bool supports_narrow_arith_type = type != DXIL::ComponentType::F16 || support_native_fp16_operations(); if (options.storage_16bit_input_output && supports_narrow_arith_type) { if (component_type_is_16bit(type)) builder().addCapability(spv::CapabilityStorageInputOutput16); } else { // Expand/contract on load/store. // The only reasonable way this can break is if application relies on // lower precision in interpolation, but I don't think you can rely on that // kind of implementation detail ... type = convert_16bit_component_to_32bit(type); } return type; } spv::Id Converter::Impl::get_effective_input_output_type_id(DXIL::ComponentType type) { return get_type_id(get_effective_input_output_type(type), 1, 1); } bool Converter::Impl::type_can_relax_precision(const llvm::Type *type, bool known_integer_sign) const { if (!options.arithmetic_relaxed_precision) return false; if (type->getTypeID() == llvm::Type::TypeID::ArrayTyID) type = llvm::cast(type)->getArrayElementType(); if (type->getTypeID() == llvm::Type::TypeID::VectorTyID) type = llvm::cast(type)->getElementType(); return (!execution_mode_meta.native_16bit_operations && !options.min_precision_prefer_native_16bit) && (type->getTypeID() == llvm::Type::TypeID::HalfTyID || (type->getTypeID() == llvm::Type::TypeID::IntegerTyID && type->getIntegerBitWidth() == 16 && known_integer_sign)); } void Converter::Impl::decorate_relaxed_precision(const llvm::Type *type, spv::Id id, bool known_integer_sign) { // Ignore RelaxedPrecision for integers since they are untyped in LLVM for the most part. // For texture loading operations and similar, we load in the appropriate sign, so it's safe to use RelaxedPrecision, // since RelaxedPrecision may sign-extend based on the OpTypeInt's signage. // DXIL is kinda broken in this regard since min16int and min16uint lower to the same i16 type ... :( if (type_can_relax_precision(type, known_integer_sign)) builder().addDecoration(id, spv::DecorationRelaxedPrecision); } void Converter::Impl::set_option(const OptionBase &cap) { switch (cap.type) { case Option::ShaderDemoteToHelper: options.shader_demote = static_cast(cap).supported; break; case Option::DualSourceBlending: options.dual_source_blending = static_cast(cap).enabled; break; case Option::OutputSwizzle: { auto &swiz = static_cast(cap); options.output_swizzles.clear(); options.output_swizzles.insert(options.output_swizzles.end(), swiz.swizzles, swiz.swizzles + swiz.swizzle_count); break; } case Option::RasterizerSampleCount: { auto &count = static_cast(cap); options.rasterizer_sample_count = count.count; options.rasterizer_sample_count_spec_constant = count.spec_constant; break; } case Option::RootConstantInlineUniformBlock: { auto &ubo = static_cast(cap); options.inline_ubo_descriptor_set = ubo.desc_set; options.inline_ubo_descriptor_binding = ubo.binding; options.inline_ubo_enable = ubo.enable; break; } case Option::BindlessCBVSSBOEmulation: { auto &bindless = static_cast(cap); options.bindless_cbv_ssbo_emulation = bindless.enable; break; } case Option::PhysicalStorageBuffer: { auto &psb = static_cast(cap); options.physical_storage_buffer = psb.enable; break; } case Option::SBTDescriptorSizeLog2: { auto &sbt = static_cast(cap); options.sbt_descriptor_size_srv_uav_cbv_log2 = sbt.size_log2_srv_uav_cbv; options.sbt_descriptor_size_sampler_log2 = sbt.size_log2_sampler; break; } case Option::SSBOAlignment: { auto &align = static_cast(cap); options.ssbo_alignment = align.alignment; break; } case Option::TypedUAVReadWithoutFormat: { auto &uav = static_cast(cap); options.typed_uav_read_without_format = uav.supported; break; } case Option::ShaderSourceFile: { auto &file = static_cast(cap); if (!file.name.empty()) options.shader_source_file = file.name; else options.shader_source_file.clear(); break; } case Option::BindlessTypedBufferOffsets: { auto &off = static_cast(cap); options.bindless_typed_buffer_offsets = off.enable; break; } case Option::BindlessOffsetBufferLayout: { auto &off = static_cast(cap); options.offset_buffer_layout = { off.untyped_offset, off.typed_offset, off.stride }; break; } case Option::StorageInputOutput16: { auto &storage = static_cast(cap); options.storage_16bit_input_output = storage.supported; break; } case Option::DescriptorQA: { auto &qa = static_cast(cap); options.descriptor_qa_enabled = qa.enabled; options.descriptor_qa.version = qa.version; options.descriptor_qa.shader_hash = qa.shader_hash; options.descriptor_qa.global_desc_set = qa.global_desc_set; options.descriptor_qa.global_binding = qa.global_binding; options.descriptor_qa.heap_desc_set = qa.heap_desc_set; options.descriptor_qa.heap_binding = qa.heap_binding; break; } case Option::MinPrecisionNative16Bit: { auto &minprec = static_cast(cap); options.min_precision_prefer_native_16bit = minprec.enabled; break; } case Option::ShaderI8Dot: options.shader_i8_dot_enabled = static_cast(cap).supported; break; case Option::ShaderRayTracingPrimitiveCulling: options.ray_tracing_primitive_culling_enabled = static_cast(cap).supported; break; case Option::InvariantPosition: options.invariant_position = static_cast(cap).enabled; break; case Option::ScalarBlockLayout: options.scalar_block_layout = static_cast(cap).supported; options.supports_per_component_robustness = static_cast(cap).supports_per_component_robustness; break; case Option::BarycentricKHR: options.khr_barycentrics_enabled = static_cast(cap).supported; break; case Option::RobustPhysicalCBVLoad: // Obsolete option, use normal quirks instead. options.quirks.robust_physical_cbv = static_cast(cap).enabled; break; case Option::ArithmeticRelaxedPrecision: options.arithmetic_relaxed_precision = static_cast(cap).enabled; break; case Option::PhysicalAddressDescriptorIndexing: options.physical_address_descriptor_stride = static_cast(cap).element_stride; options.physical_address_descriptor_offset = static_cast(cap).element_offset; break; case Option::ForceSubgroupSize: options.force_subgroup_size = static_cast(cap).forced_value; options.force_wave_size_enable = static_cast(cap).wave_size_enable; break; case Option::DenormPreserveSupport: options.supports_float16_denorm_preserve = static_cast(cap).support_float16_denorm_preserve; options.supports_float64_denorm_preserve = static_cast(cap).support_float64_denorm_preserve; break; case Option::StrictHelperLaneWaveOps: options.strict_helper_lane_waveops = static_cast(cap).enable; break; case Option::SubgroupPartitionedNV: options.nv_subgroup_partition_enabled = static_cast(cap).supported; break; case Option::DeadCodeEliminate: options.eliminate_dead_code = static_cast(cap).enabled; break; case Option::PreciseControl: options.propagate_precise = static_cast(cap).propagate_precise; options.force_precise = static_cast(cap).force_precise; break; case Option::SampleGradOptimizationControl: options.grad_opt.enabled = static_cast(cap).enabled; options.grad_opt.assume_uniform_scale = static_cast(cap).assume_uniform_scale; break; case Option::OpacityMicromap: options.opacity_micromap_enabled = static_cast(cap).enabled; break; case Option::BranchControl: { auto &c = static_cast(cap); options.branch_control.use_shader_metadata = c.use_shader_metadata; options.branch_control.force_branch = c.force_branch; options.branch_control.force_unroll = c.force_unroll; options.branch_control.force_loop = c.force_loop; options.branch_control.force_flatten = c.force_flatten; break; } case Option::SubgroupProperties: { auto &c = static_cast(cap); options.subgroup_size.implementation_minimum = c.minimum_size; options.subgroup_size.implementation_maximum = c.maximum_size; break; } case Option::DescriptorHeapRobustness: { auto &c = static_cast(cap); options.descriptor_heap_robustness = c.enabled; break; } case Option::ComputeShaderDerivativesNV: { auto &c = static_cast(cap); options.compute_shader_derivatives = c.supported; break; } case Option::QuadControlReconvergence: { auto &c = static_cast(cap); options.supports_quad_control = c.supports_quad_control; options.supports_maximal_reconvergence = c.supports_maximal_reconvergence; options.force_maximal_reconvergence = c.force_maximal_reconvergence; break; } case Option::RawAccessChainsNV: { auto &c = static_cast(cap); options.nv_raw_access_chains = c.supported; break; } case Option::DriverVersion: { auto &c = static_cast(cap); options.driver_id = c.driver_id; options.driver_version = c.driver_version; break; } case Option::ComputeShaderDerivatives: { auto &c = static_cast(cap); options.compute_shader_derivatives = c.supports_nv || c.supports_khr; options.compute_shader_derivatives_khr = c.supports_khr; break; } case Option::InstructionInstrumentation: { auto &qa = static_cast(cap); options.instruction_instrumentation.enabled = qa.enabled; options.instruction_instrumentation.version = qa.version; options.instruction_instrumentation.shader_hash = qa.shader_hash; options.instruction_instrumentation.fp16 = false; options.instruction_instrumentation.fp32 = true; options.instruction_instrumentation.fp64 = true; options.instruction_instrumentation.type = qa.type; options.instruction_instrumentation.control_desc_set = qa.control_desc_set; options.instruction_instrumentation.control_binding = qa.control_binding; options.instruction_instrumentation.payload_desc_set = qa.payload_desc_set; options.instruction_instrumentation.payload_binding = qa.payload_binding; break; } case Option::ShaderQuirk: { auto &quirk = static_cast(cap); switch (quirk.quirk) { case ShaderQuirk::ForceDeviceMemoryBarriersThreadGroupCoherence: // Dragon Age: Veilguard workaround. options.quirks.force_device_memory_barriers_thread_group_coherence = true; break; case ShaderQuirk::AssumeBrokenSub8x8CubeMips: // The First Descendant workaround. Importance sampling pass is broken since only mips down to 8x8 // are populated with valid data. options.quirks.assume_broken_sub_8x8_cube_mips = true; break; case ShaderQuirk::RobustPhysicalCBVForwarding: // Gray Zone Warfare workaround. Does CBV forwarding with out of bounds access on the local array <_<. // Can trip page faults. options.quirks.robust_physical_cbv_forwarding = true; break; case ShaderQuirk::MeshOutputRobustness: options.quirks.mesh_outputs_bounds_check = true; break; case ShaderQuirk::AggressiveNonUniform: // Starfield workaround. Some shaders should have used nonuniform, // but the general pattern to detect it is quite complicated. options.quirks.aggressive_nonuniform = true; break; case ShaderQuirk::RobustPhysicalCBV: options.quirks.robust_physical_cbv = true; break; case ShaderQuirk::PromoteGroupToDeviceMemoryBarrier: options.quirks.promote_group_to_device_memory_barrier = true; break; case ShaderQuirk::GroupSharedAutoBarrier: options.quirks.group_shared_auto_barrier = true; break; case ShaderQuirk::FixupLoopHeaderUndefPhis: options.quirks.fixup_loop_header_undef_phis = true; break; case ShaderQuirk::FixupRsqrtInfNan: options.quirks.fixup_rsqrt = true; break; case ShaderQuirk::IgnorePrimitiveShadingRate: options.quirks.ignore_primitive_shading_rate = true; break; case ShaderQuirk::RobustComputeQuadBroadcast: options.quirks.robust_compute_quad_broadcast = true; break; case ShaderQuirk::PreciseFMA: options.quirks.precise_fma = true; break; default: break; } break; } case Option::ExtendedRobustness: { auto &robust = static_cast(cap); options.extended_robustness.alloca = robust.robust_alloca; options.extended_robustness.constant_lut = robust.robust_constant_lut; options.extended_robustness.group_shared = robust.robust_group_shared; break; } case Option::MaxTessFactor: { auto &tess_factor = static_cast(cap); options.max_tess_factor = tess_factor.max_tess_factor; break; } case Option::VulkanMemoryModel: { auto &vmm = static_cast(cap); execution_mode_meta.memory_model = vmm.enabled ? spv::MemoryModelVulkan : spv::MemoryModelGLSL450; break; } case Option::Float8Support: { auto &float8 = static_cast(cap); options.wmma_fp8 = float8.wmma_fp8; options.nv_cooperative_matrix2_conversions = float8.nv_cooperative_matrix2_conversions; break; } case Option::NvAPI: { auto &nv = static_cast(cap); options.nvapi.enabled = nv.enabled; options.nvapi.register_index = nv.register_index; options.nvapi.register_space = nv.register_space; break; } case Option::ExtendedNonSemantic: { auto &sem = static_cast(cap); options.extended_non_semantic_info = sem.enabled; break; } case Option::ViewInstancing: { auto &inst = static_cast(cap); options.multiview.enable = inst.enabled; options.multiview.last_pre_rasterization_stage = inst.last_pre_rasterization_stage; options.multiview.view_index_to_view_instance_spec_id = inst.view_index_to_view_instance_spec_id; options.multiview.view_instance_to_viewport_spec_id = inst.view_instance_to_viewport_spec_id; break; } case Option::MixedDotProduct: { auto &dot = static_cast(cap); options.mixed_dot_product_fp16_fp16_fp32 = dot.fp16_fp16_fp32; break; } case Option::ComputeShaderDerivativesQuad: { auto &c = static_cast(cap); options.compute_shader_derivatives_quad = c.supports_quad; break; } default: break; } } void Converter::Impl::suggest_maximum_wave_size(unsigned wave_size) { if ((execution_mode_meta.heuristic_max_wave_size == 0 || execution_mode_meta.heuristic_max_wave_size > wave_size) && options.force_subgroup_size == 0) { execution_mode_meta.heuristic_max_wave_size = wave_size; } } void Converter::Impl::suggest_minimum_wave_size(unsigned wave_size) { if ((execution_mode_meta.heuristic_min_wave_size == 0 || execution_mode_meta.heuristic_min_wave_size < wave_size) && options.force_subgroup_size == 0) { execution_mode_meta.heuristic_min_wave_size = wave_size; } } void Converter::set_resource_remapping_interface(ResourceRemappingInterface *iface) { impl->resource_mapping_iface = iface; } void Converter::set_meta_descriptor(MetaDescriptor desc, MetaDescriptorKind kind, uint32_t desc_set, uint32_t binding) { if (int(desc) >= int(MetaDescriptor::Count)) return; impl->options.meta_descriptor_mappings[int(desc)] = { kind, desc_set, binding }; } ShaderStage Converter::get_shader_stage(const LLVMBCParser &bitcode_parser, const char *entry) { auto &module = bitcode_parser.get_module(); return Impl::get_remapping_stage(get_execution_model(module, get_entry_point_meta(module, entry))); } void Converter::scan_resources(ResourceRemappingInterface *iface, const LLVMBCParser &bitcode_parser) { Impl::scan_resources(iface, bitcode_parser); } void Converter::add_option(const OptionBase &cap) { impl->set_option(cap); } bool Converter::recognizes_option(Option cap) { return unsigned(cap) < unsigned(Option::Count); } void Converter::set_entry_point(const char *entry) { impl->options.entry_point = entry; } void Converter::add_root_parameter_mapping(uint32_t root_parameter_index, uint32_t offset) { impl->root_parameter_mappings.push_back({ root_parameter_index, offset }); } uint32_t Converter::pack_desc_set_binding_to_virtual_offset(uint32_t desc_set, uint32_t binding) { return 0x80000000u | (desc_set << 24) | binding; } void Converter::add_non_semantic_debug_info(const NonSemanticDebugInfo &info) { impl->non_semantic_debug_info.push_back(info); } const String &Converter::get_compiled_entry_point() const { return impl->execution_mode_meta.entry_point_name; } const GlobalConfiguration &GlobalConfiguration::get() { static GlobalConfiguration config; return config; } GlobalConfiguration::GlobalConfiguration() { const char *env = getenv("DXIL_SPIRV_CONFIG"); if (env) { if (strcmp(env, "wmma_rdna3_workaround") == 0) wmma_rdna3_workaround = true; else if (strcmp(env, "wmma_conv_hack") == 0) wmma_conv_hack = true; } } } // namespace dxil_spv ================================================ FILE: dxil_converter.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "cfg_structurizer.hpp" #include "dxil_parser.hpp" #include "llvm_bitcode_parser.hpp" #include "node_pool.hpp" #include "spirv_module.hpp" #include namespace spv { class Function; } namespace dxil_spv { struct ConvertedFunction { struct Function { CFGNode *entry; spv::Function *func; bool is_structured; }; Function entry = {}; Vector leaf_functions; std::unique_ptr node_pool; }; enum class ShaderStage : unsigned { Unknown = 0, Vertex = 1, Hull = 2, Domain = 3, Geometry = 4, Pixel = 5, Compute = 6, Intersection = 7, ClosestHit = 8, Miss = 9, AnyHit = 10, RayGeneration = 11, Callable = 12, Amplification = 13, Mesh = 14, }; struct D3DBinding { ShaderStage stage; DXIL::ResourceKind kind; // The index in which the resource was declared in the module. // Range is [0, N), where N is number of resources. unsigned resource_index; // : register(N, spaceM) // If register(UINT32_MAX, UINT32_MAX) with range_size = UINT32_MAX is used, this is treated // as a binding of the global descriptor heap. unsigned register_space; unsigned register_index; // -1 -> unsized, 1 means non-arrayed resource. unsigned range_size; // For raw buffers, this is equal to 16, for structured buffers this is equal to the stride of the elements. // Otherwise, 0. unsigned alignment; }; enum class VulkanDescriptorType : unsigned { Identity = 0, SSBO = 1, TexelBuffer = 2, BufferDeviceAddress = 3, UBO = 4, InputAttachment = 5 }; struct VulkanBinding { unsigned descriptor_set; unsigned binding; // For bindless, refers to the Nth root constant. // For buffer device address, refers to the Nth root descriptor. union { unsigned root_constant_index; unsigned input_attachment_index; }; struct { unsigned heap_root_offset; // If true, the resource is accessed directly from a descriptor heap in way which emulates D3D12 closely. // layout(set = descriptor_set, binding = binding) uniform Type HEAP[]; // HEAP[shader_index + heap_offset + registers.u32s[push_constant_member]]. bool use_heap; } bindless; VulkanDescriptorType descriptor_type; }; struct D3DUAVBinding { D3DBinding binding; bool counter; }; struct VulkanSRVBinding { VulkanBinding buffer_binding; VulkanBinding offset_binding; }; struct VulkanUAVBinding { VulkanBinding buffer_binding; VulkanBinding counter_binding; VulkanBinding offset_binding; }; struct VulkanPushConstantBinding { unsigned offset_in_words; }; struct VulkanCBVBinding { union { VulkanBinding buffer; VulkanPushConstantBinding push; }; // Select if the CBV should fetch constants from push constants, or regular UBO. bool push_constant; }; struct D3DStageIO { const char *semantic; unsigned semantic_index; unsigned start_row; unsigned rows; }; enum VulkanStageIoFlagBits { STAGE_IO_NONE = 0u, STAGE_IO_PER_PRIMITIVE = 0x1u, }; using VulkanStageIoFlags = unsigned; struct VulkanStageIO { unsigned location; unsigned component; VulkanStageIoFlags flags; }; struct D3DStreamOutput { const char *semantic; unsigned semantic_index; }; struct VulkanStreamOutput { unsigned offset; unsigned stride; unsigned buffer_index; bool enable; }; class ResourceRemappingInterface { public: virtual ~ResourceRemappingInterface() = default; virtual bool remap_srv(const D3DBinding &d3d_binding, VulkanSRVBinding &vulkan_binding) = 0; virtual bool remap_sampler(const D3DBinding &d3d_binding, VulkanBinding &vulkan_binding) = 0; virtual bool remap_uav(const D3DUAVBinding &d3d_binding, VulkanUAVBinding &vulkan_binding) = 0; virtual bool remap_cbv(const D3DBinding &d3d_binding, VulkanCBVBinding &vulkan_binding) = 0; virtual bool remap_vertex_input(const D3DStageIO &d3d_input, VulkanStageIO &vulkan_location) = 0; virtual bool remap_stream_output(const D3DStreamOutput &d3d_output, VulkanStreamOutput &vulkan_output) = 0; virtual bool remap_stage_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) = 0; virtual bool remap_stage_output(const D3DStageIO &d3d_output, VulkanStageIO &vk_output) = 0; virtual unsigned get_root_constant_word_count() = 0; virtual unsigned get_root_descriptor_count() = 0; virtual bool has_nontrivial_stage_input_remapping() = 0; }; enum class Option : uint32_t { Invalid = 0, ShaderDemoteToHelper = 1, DualSourceBlending = 2, OutputSwizzle = 3, RasterizerSampleCount = 4, RootConstantInlineUniformBlock = 5, BindlessCBVSSBOEmulation = 6, PhysicalStorageBuffer = 7, SBTDescriptorSizeLog2 = 8, SSBOAlignment = 9, TypedUAVReadWithoutFormat = 10, ShaderSourceFile = 11, BindlessTypedBufferOffsets = 12, BindlessOffsetBufferLayout = 13, StorageInputOutput16 = 14, DescriptorQA = 15, MinPrecisionNative16Bit = 16, ShaderI8Dot = 17, ShaderRayTracingPrimitiveCulling = 18, InvariantPosition = 19, ScalarBlockLayout = 20, BarycentricKHR = 21, RobustPhysicalCBVLoad = 22, ArithmeticRelaxedPrecision = 23, PhysicalAddressDescriptorIndexing = 24, ForceSubgroupSize = 25, DenormPreserveSupport = 26, StrictHelperLaneWaveOps = 27, SubgroupPartitionedNV = 28, DeadCodeEliminate = 29, PreciseControl = 30, SampleGradOptimizationControl = 31, OpacityMicromap = 32, BranchControl = 33, SubgroupProperties = 34, DescriptorHeapRobustness = 35, ComputeShaderDerivativesNV = 36, QuadControlReconvergence = 37, RawAccessChainsNV = 38, DriverVersion = 39, ComputeShaderDerivatives = 40, InstructionInstrumentation = 41, ShaderQuirk = 42, ExtendedRobustness = 43, MaxTessFactor = 44, VulkanMemoryModel = 45, Float8Support = 46, NvAPI = 47, ExtendedNonSemantic = 48, ViewInstancing = 49, MixedDotProduct = 50, ComputeShaderDerivativesQuad = 51, Count }; enum class ResourceClass : uint32_t { SRV = 0, UAV = 1, CBV = 2, Sampler = 3 }; struct OptionBase { explicit OptionBase(Option cap) : type(cap) { } Option type; DXIL_SPV_OVERRIDE_NEW_DELETE }; struct OptionShaderDemoteToHelper : OptionBase { OptionShaderDemoteToHelper() : OptionBase(Option::ShaderDemoteToHelper) { } bool supported = false; }; struct OptionDualSourceBlending : OptionBase { OptionDualSourceBlending() : OptionBase(Option::DualSourceBlending) { } bool enabled = false; }; struct OptionOutputSwizzle : OptionBase { OptionOutputSwizzle() : OptionBase(Option::OutputSwizzle) { } const unsigned *swizzles = nullptr; unsigned swizzle_count = 0; }; struct OptionRasterizerSampleCount : OptionBase { OptionRasterizerSampleCount() : OptionBase(Option::RasterizerSampleCount) { } unsigned count = 0; bool spec_constant = false; }; struct OptionRootConstantInlineUniformBlock : OptionBase { OptionRootConstantInlineUniformBlock() : OptionBase(Option::RootConstantInlineUniformBlock) { } unsigned desc_set = 0; unsigned binding = 0; bool enable = false; }; struct OptionBindlessCBVSSBOEmulation : OptionBase { OptionBindlessCBVSSBOEmulation() : OptionBase(Option::BindlessCBVSSBOEmulation) { } bool enable = false; }; struct OptionPhysicalStorageBuffer : OptionBase { OptionPhysicalStorageBuffer() : OptionBase(Option::PhysicalStorageBuffer) { } bool enable = false; }; struct OptionSBTDescriptorSizeLog2 : OptionBase { OptionSBTDescriptorSizeLog2() : OptionBase(Option::SBTDescriptorSizeLog2) { } unsigned size_log2_srv_uav_cbv = 0; unsigned size_log2_sampler = 0; }; struct OptionSSBOAlignment : OptionBase { OptionSSBOAlignment() : OptionBase(Option::SSBOAlignment) { } unsigned alignment = 1; }; struct OptionTypedUAVReadWithoutFormat : OptionBase { OptionTypedUAVReadWithoutFormat() : OptionBase(Option::TypedUAVReadWithoutFormat) { } bool supported = false; }; struct OptionShaderSourceFile : OptionBase { OptionShaderSourceFile() : OptionBase(Option::ShaderSourceFile) { } String name; }; struct OptionBindlessTypedBufferOffsets : OptionBase { OptionBindlessTypedBufferOffsets() : OptionBase(Option::BindlessTypedBufferOffsets) { } bool enable = false; }; struct OptionBindlessOffsetBufferLayout : OptionBase { OptionBindlessOffsetBufferLayout() : OptionBase(Option::BindlessOffsetBufferLayout) { } unsigned untyped_offset = 0; unsigned typed_offset = 0; unsigned stride = 1; }; struct OptionStorageInputOutput16 : OptionBase { OptionStorageInputOutput16() : OptionBase(Option::StorageInputOutput16) { } bool supported = true; }; struct OptionDescriptorQA : OptionBase { OptionDescriptorQA() : OptionBase(Option::DescriptorQA) { } enum { DefaultVersion = 1 }; bool enabled = false; uint32_t version = DefaultVersion; uint32_t global_desc_set = 0; uint32_t global_binding = 0; uint32_t heap_desc_set = 0; uint32_t heap_binding = 0; uint64_t shader_hash = 0; }; struct OptionMinPrecisionNative16Bit : OptionBase { OptionMinPrecisionNative16Bit() : OptionBase(Option::MinPrecisionNative16Bit) { } bool enabled = false; }; struct OptionShaderI8Dot : OptionBase { OptionShaderI8Dot() : OptionBase(Option::ShaderI8Dot) { } bool supported = false; }; struct OptionShaderRayTracingPrimitiveCulling : OptionBase { OptionShaderRayTracingPrimitiveCulling() : OptionBase(Option::ShaderRayTracingPrimitiveCulling) { } bool supported = false; }; struct OptionInvariantPosition : OptionBase { OptionInvariantPosition() : OptionBase(Option::InvariantPosition) { } bool enabled = false; }; struct OptionScalarBlockLayout : OptionBase { OptionScalarBlockLayout() : OptionBase(Option::ScalarBlockLayout) { } bool supported = false; bool supports_per_component_robustness = false; }; struct OptionBarycentricKHR : OptionBase { OptionBarycentricKHR() : OptionBase(Option::BarycentricKHR) { } bool supported = false; }; struct OptionRobustPhysicalCBVLoad : OptionBase { OptionRobustPhysicalCBVLoad() : OptionBase(Option::RobustPhysicalCBVLoad) { } bool enabled = false; }; struct OptionArithmeticRelaxedPrecision : OptionBase { OptionArithmeticRelaxedPrecision() : OptionBase(Option::ArithmeticRelaxedPrecision) { } bool enabled = false; }; struct OptionPhysicalAddressDescriptorIndexing : OptionBase { OptionPhysicalAddressDescriptorIndexing() : OptionBase(Option::PhysicalAddressDescriptorIndexing) { } // In units of uint64_t addresses. // Used for scenarios where a descriptor buffer is bound directly as an SSBO // and raw VAs might not longer be tightly packed in memory. unsigned element_stride = 1; unsigned element_offset = 0; }; struct OptionForceSubgroupSize : OptionBase { OptionForceSubgroupSize() : OptionBase(Option::ForceSubgroupSize) { } // If not 0, forces WaveGetLaneCount() to return a fixed value. // Can be used to force a shader to avoid buggy code paths. unsigned forced_value = 0; // If true and forced_value is not 0, // pretends that the shader exposes SM 6.6 WaveSize equal to forced_value. // Intended use case: // - false: Workaround and avoid bad subgroup code paths by setting forced_value to something meaningless. // - true: Performance tweaks. Force e.g. wave32 vs wave64 on RDNA. bool wave_size_enable = false; }; struct OptionDenormPreserveSupport : OptionBase { OptionDenormPreserveSupport() : OptionBase(Option::DenormPreserveSupport) { } // Should always be set to true if supported. // If not supported, rely on implementation to default to the right thing. bool support_float16_denorm_preserve = false; bool support_float64_denorm_preserve = false; }; struct OptionStrictHelperLaneWaveOps : OptionBase { OptionStrictHelperLaneWaveOps() : OptionBase(Option::StrictHelperLaneWaveOps) { } // If true, and WaveOpsIncludeHelperLanes is not set, // helper lanes explicitly do not participate in wave ops. bool enable = true; }; struct OptionSubgroupPartitionedNV : OptionBase { OptionSubgroupPartitionedNV() : OptionBase(Option::SubgroupPartitionedNV) { } bool supported = false; }; struct OptionDeadCodeEliminate : OptionBase { OptionDeadCodeEliminate() : OptionBase(Option::DeadCodeEliminate) { } bool enabled = false; }; struct OptionPreciseControl : OptionBase { OptionPreciseControl() : OptionBase(Option::PreciseControl) { } bool force_precise = false; bool propagate_precise = false; }; struct OptionSampleGradOptimizationControl : OptionBase { OptionSampleGradOptimizationControl() : OptionBase(Option::SampleGradOptimizationControl) { } bool enabled = false; bool assume_uniform_scale = false; }; struct OptionOpacityMicromap : OptionBase { OptionOpacityMicromap() : OptionBase(Option::OpacityMicromap) { } bool enabled = false; }; struct OptionBranchControl : OptionBase { OptionBranchControl() : OptionBase(Option::BranchControl) { } bool use_shader_metadata = false; bool force_unroll = false; bool force_loop = false; bool force_flatten = false; bool force_branch = false; }; struct OptionSubgroupProperties : OptionBase { OptionSubgroupProperties() : OptionBase(Option::SubgroupProperties) { } unsigned minimum_size = 4; unsigned maximum_size = 128; }; struct OptionDescriptorHeapRobustness : OptionBase { OptionDescriptorHeapRobustness() : OptionBase(Option::DescriptorHeapRobustness) { } bool enabled = false; }; struct OptionComputeShaderDerivativesNV : OptionBase { OptionComputeShaderDerivativesNV() : OptionBase(Option::ComputeShaderDerivativesNV) { } // Before adding this option, we assumed that by default. bool supported = true; }; struct OptionQuadControlReconvergence : OptionBase { OptionQuadControlReconvergence() : OptionBase(Option::QuadControlReconvergence) { } bool supports_quad_control = false; bool supports_maximal_reconvergence = false; bool force_maximal_reconvergence = false; }; struct OptionRawAccessChainsNV : OptionBase { OptionRawAccessChainsNV() : OptionBase(Option::RawAccessChainsNV) { } bool supported = false; }; struct OptionDriverVersion : OptionBase { OptionDriverVersion() : OptionBase(Option::DriverVersion) { } uint32_t driver_id = 0; // Vulkan12Properties::driverID uint32_t driver_version = 0; // PhysicalDeviceProperties::driverVersion }; struct OptionComputeShaderDerivatives : OptionBase { OptionComputeShaderDerivatives() : OptionBase(Option::ComputeShaderDerivatives) { } bool supports_nv = false; bool supports_khr = false; }; struct OptionInstructionInstrumentation : OptionBase { OptionInstructionInstrumentation() : OptionBase(Option::InstructionInstrumentation) { } enum { DefaultVersion = 1 }; bool enabled = false; uint32_t version = DefaultVersion; uint32_t control_desc_set = 0; uint32_t control_binding = 0; uint32_t payload_desc_set = 0; uint32_t payload_binding = 0; uint64_t shader_hash = 0; InstructionInstrumentationType type = {}; }; enum class ShaderQuirk : uint32_t { None = 0, ForceDeviceMemoryBarriersThreadGroupCoherence, AssumeBrokenSub8x8CubeMips, RobustPhysicalCBVForwarding, MeshOutputRobustness, AggressiveNonUniform, RobustPhysicalCBV, PromoteGroupToDeviceMemoryBarrier, GroupSharedAutoBarrier, FixupLoopHeaderUndefPhis, FixupRsqrtInfNan, IgnorePrimitiveShadingRate, RobustComputeQuadBroadcast, PreciseFMA }; struct OptionShaderQuirk : OptionBase { OptionShaderQuirk() : OptionBase(Option::ShaderQuirk) { } ShaderQuirk quirk = ShaderQuirk::None; }; struct OptionExtendedRobustness : OptionBase { OptionExtendedRobustness() : OptionBase(Option::ExtendedRobustness) { } bool robust_group_shared = false; bool robust_alloca = false; bool robust_constant_lut = false; }; struct OptionMaxTessFactor : OptionBase { OptionMaxTessFactor() : OptionBase(Option::MaxTessFactor) { } unsigned max_tess_factor = 0; }; struct OptionVulkanMemoryModel : OptionBase { OptionVulkanMemoryModel() : OptionBase(Option::VulkanMemoryModel) { } bool enabled = false; }; struct OptionFloat8Support : OptionBase { OptionFloat8Support() : OptionBase(Option::Float8Support) { } bool wmma_fp8 = false; bool nv_cooperative_matrix2_conversions = false; }; struct OptionNvAPI : OptionBase { OptionNvAPI() : OptionBase(Option::NvAPI) { } bool enabled = false; unsigned register_index = 0; unsigned register_space = 0; }; struct OptionExtendedNonSemantic : OptionBase { OptionExtendedNonSemantic() : OptionBase(Option::ExtendedNonSemantic) { } bool enabled = false; }; struct OptionViewInstancing : OptionBase { OptionViewInstancing() : OptionBase(Option::ViewInstancing) { } bool enabled = false; bool implicit_viewport_offset = false; bool last_pre_rasterization_stage = false; uint32_t view_index_to_view_instance_spec_id = UINT32_MAX; uint32_t view_instance_to_viewport_spec_id = UINT32_MAX; }; struct OptionMixedDotProduct : OptionBase { OptionMixedDotProduct() : OptionBase(Option::MixedDotProduct) { } bool fp16_fp16_fp32 = false; }; struct OptionComputeShaderDerivativesQuad : OptionBase { OptionComputeShaderDerivativesQuad() : OptionBase(Option::ComputeShaderDerivativesQuad) { } bool supports_quad = false; }; struct DescriptorTableEntry { ResourceClass type; uint32_t register_space; uint32_t register_index; uint32_t num_descriptors_in_range; uint32_t offset_in_heap; }; struct NodeDispatchGrid { uint32_t offset; DXIL::ComponentType component_type; uint32_t count; }; struct NodeInputData { String node_id; uint32_t payload_stride; DXIL::NodeLaunchType launch_type; uint32_t node_array_index; NodeDispatchGrid grid_buffer; uint32_t broadcast_grid[3]; uint32_t thread_group_size_spec_id[3]; uint32_t max_broadcast_grid_spec_id[3]; uint32_t recursion_factor; uint32_t coalesce_factor; String node_share_input_id; uint32_t node_share_input_array_index; uint32_t local_root_arguments_table_index; uint32_t is_indirect_bda_stride_program_entry_spec_id; uint32_t is_entry_point_spec_id; uint32_t dispatch_grid_is_upper_bound_spec_id; uint32_t is_static_broadcast_node_spec_id; bool dispatch_grid_is_upper_bound; bool node_track_rw_input_sharing; bool is_program_entry; }; struct NodeOutputData { String node_id; uint32_t node_array_index; uint32_t node_array_size; uint32_t node_index_spec_constant_id; uint32_t max_records; bool sparse_array; }; struct NonSemanticDebugInfo { const char *tag; const void *data; size_t size; }; enum class ShaderFeature { Native16BitOperations = 0, Count }; enum class MetaDescriptor { // u32 containing number of descriptors in CBV_SRV_UAV heap. // Must be UBOContainingConstant. ResourceDescriptorHeapSize = 0, // A BDA pointing to first descriptor payload in resource heap. // May point to real descriptors, or only UAV counters depending on driver needs. // Stride / offset of pointer is determined by Option::PhysicalAddressDescriptorIndexing. // Must be UBOContainingBDA or ReadonlySSBO. RawDescriptorHeapView = 1, // - u16 ViewID; // - u16 LayerOffset; // Packed into one u32. // Must be UBOContainingConstant. DynamicViewInstancingOffsets = 2, // - u32 ActiveViewIDMask // Must be UBOContainingConstant. DynamicViewInstancingMask = 3, Count }; enum class MetaDescriptorKind { Invalid, // Currently unused, could be extended as needed. PushConstant, PushBDA, // An UBO containing plain constants. // May not be backed by a real descriptor, and be hoisted through some special mechanism. UBOContainingConstant, // An UBO containing a BDA. May not be backed by a real descriptor. // May not be backed by a real descriptor, and be hoisted through some special mechanism. UBOContainingBDA, // An SSBO backed by a real descriptor, i.e. OpArrayLength is valid. ReadonlySSBO }; class Converter { public: Converter(LLVMBCParser &bitcode_parser, LLVMBCParser *bitcode_reflection_parser, SPIRVModule &module); ~Converter(); ConvertedFunction convert_entry_point(); void set_resource_remapping_interface(ResourceRemappingInterface *iface); static ShaderStage get_shader_stage(const LLVMBCParser &bitcode_parser, const char *entry = nullptr); static void scan_resources(ResourceRemappingInterface *iface, const LLVMBCParser &bitcode_parser); static Vector get_entry_points(const LLVMBCParser &parser); static bool entry_point_matches(const String &mangled, const char *user); void set_entry_point(const char *entry); const String &get_compiled_entry_point() const; void add_option(const OptionBase &cap); static bool recognizes_option(Option cap); // These are declared separately since we need to declare a concrete physical buffer layout // for local root signature elements which depends on the entire local root signature. // It would get somewhat awkward to shoehorn this into the resource "pull" API for normal resources. void add_local_root_constants(uint32_t register_space, uint32_t register_index, uint32_t num_words); void add_local_root_descriptor(ResourceClass type, uint32_t register_space, uint32_t register_index); // Local root descriptor tables are special. They must be constructed in such a way that // the MSB 32 bits can be ignored and the LSB 32 bits are encoded as Index << SBTDescriptorSizeLog2. // Thus, we translate GPU VA to index by a simple shift on the lower 32-bit value. void add_local_root_descriptor_table(Vector entries); void add_local_root_descriptor_table(const DescriptorTableEntry *entries, size_t count); // For debug purposes. Makes it possible to map a computed push constant offset // back to corresponding root parameter index. // Not needed by codegen, but is used by extended debug info. void add_root_parameter_mapping(uint32_t root_parameter_index, uint32_t offset); static uint32_t pack_desc_set_binding_to_virtual_offset(uint32_t desc_set, uint32_t binding); // For debug purposes. Emits arbitrary data with NonSemantic.dxil-spirv.*. // Pointers are owned by application and must remain valid until compilation is done. void add_non_semantic_debug_info(const NonSemanticDebugInfo &info); void set_patch_location_offset(uint32_t offset); // After compilation, query CS workgroup size. void get_workgroup_dimensions(uint32_t &x, uint32_t &y, uint32_t &z) const; // After compilation, query expected patch size. uint32_t get_patch_vertex_count() const; uint32_t get_patch_location_offset() const; // If any of these are non-zero, a CS may have to be compiled for a specific wave size. void get_compute_wave_size_range(uint32_t &min, uint32_t &max, uint32_t &preferred) const; // If non-zero, similar to required, but can be ignored. Used as a workaround hint or performance hint. uint32_t get_compute_heuristic_min_wave_size() const; uint32_t get_compute_heuristic_max_wave_size() const; // Returns true if view instancing is enabled and the result can be lowered directly to Vulkan. bool is_multiview_compatible() const; bool shader_requires_feature(ShaderFeature feature) const; // For esoteric CFG workarounds. bool get_driver_version(uint32_t &driver_id, uint32_t &driver_version) const; static NodeInputData get_node_input(const LLVMBCParser &parser, const char *entry); static Vector get_node_outputs(const LLVMBCParser &parser, const char *entry); String get_analysis_warnings() const; void set_meta_descriptor(MetaDescriptor desc, MetaDescriptorKind kind, uint32_t desc_set, uint32_t binding_or_push_index); struct Impl; private: std::unique_ptr impl; }; } // namespace dxil_spv ================================================ FILE: dxil_extract.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "cli_parser.hpp" #include "dxil_spirv_c.h" #include "logging.hpp" #include #include #include using namespace dxil_spv; static void print_help() { LOGE("dxil-extract [--output file.bc] [--reflection] [--verbose]\n"); } static std::vector read_file(const char *path) { FILE *file = fopen(path, "rb"); if (!file) return {}; fseek(file, 0, SEEK_END); auto len = ftell(file); rewind(file); std::vector result(len); if (fread(result.data(), 1, len, file) != size_t(len)) { fclose(file); return {}; } fclose(file); return result; } static bool write_file(const char *path, const void *data, size_t size) { bool ret = true; FILE *file = fopen(path, "wb"); if (!file) return false; if (fwrite(data, 1, size, file) != size) { LOGE("Failed to write LLVM BC.\n"); ret = false; } fclose(file); return ret; } static uint64_t vkd3d_proton_hash_fnv1(const void *data_, size_t size) { auto *data = static_cast(data_); uint64_t h = 0xcbf29ce484222325ull; for (size_t i = 0; i < size; i++) h = (h * 0x100000001b3ull) ^ data[i]; return h; } int main(int argc, char **argv) { std::string input, output; bool reflection = false; bool verbose = false; CLICallbacks cbs; cbs.add("--help", [](CLIParser &parser) { print_help(); parser.end(); }); cbs.add("--output", [&](CLIParser &parser) { output = parser.next_string(); }); cbs.add("--reflection", [&](CLIParser &) { reflection = true; }); cbs.add("--verbose", [&](CLIParser &) { verbose = true; }); cbs.default_handler = [&](const char *arg) { input = arg; }; CLIParser parser(std::move(cbs), argc - 1, argv + 1); if (!parser.parse()) return EXIT_FAILURE; else if (parser.is_ended_state()) return EXIT_SUCCESS; if (input.empty()) { LOGE("Need input file.\n"); return EXIT_FAILURE; } auto input_file = read_file(input.c_str()); if (input_file.empty()) { LOGE("Failed to read file %s.\n", input.c_str()); return EXIT_FAILURE; } dxil_spv_parsed_blob blob; if (reflection) { dxil_spv_result result; if ((result = dxil_spv_parse_reflection_dxil_blob(input_file.data(), input_file.size(), &blob)) != DXIL_SPV_SUCCESS) { // Fallback in case there is no STAT block. if (result == DXIL_SPV_ERROR_NO_DATA) { LOGW("There is no STAT block, falling back to normal DXIL block.\n"); result = dxil_spv_parse_dxil_blob(input_file.data(), input_file.size(), &blob); } if (result != DXIL_SPV_SUCCESS) { LOGE("Failed to parse blob.\n"); return EXIT_FAILURE; } } } else { if (dxil_spv_parse_dxil_blob(input_file.data(), input_file.size(), &blob) != DXIL_SPV_SUCCESS) { LOGE("Failed to parse blob.\n"); return EXIT_FAILURE; } } if (verbose) { printf("=== %s ===\n", input.c_str()); unsigned entry_point_count = 0; dxil_spv_parsed_blob_get_num_entry_points(blob, &entry_point_count); for (unsigned i = 0; i < entry_point_count; i++) { const char *demangled = nullptr; dxil_spv_parsed_blob_get_entry_point_demangled_name(blob, i, &demangled); printf(" %s\n", demangled); } printf("vkd3d-proton hash: %016llx\n", static_cast(vkd3d_proton_hash_fnv1(input_file.data(), input_file.size()))); printf("==================\n"); } const void *ir_data; size_t ir_size; if (dxil_spv_parsed_blob_get_raw_ir(blob, &ir_data, &ir_size) != DXIL_SPV_SUCCESS) { LOGE("Failed to extract raw IR.\n"); return EXIT_FAILURE; } if (output.empty()) { dxil_spv_parsed_blob_dump_llvm_ir(blob); dxil_spv_parsed_blob_free(blob); return EXIT_SUCCESS; } if (!write_file(output.c_str(), ir_data, ir_size)) { LOGE("Failed to write IR to %s.\n", output.c_str()); return EXIT_FAILURE; } dxil_spv_parsed_blob_free(blob); } ================================================ FILE: dxil_parser.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "dxil_parser.hpp" #include "dxil.hpp" #include "memory_stream.hpp" #include "logging.hpp" #include #include #include namespace dxil_spv { bool is_mangled_entry_point(const char *user) { // The mangling algorithm is intentionally left undefined in spec. // However, the mangling scheme clearly follows MSVC here. // The format we're looking for is: // ?@. // http://www.agner.org/optimize/calling_conventions.pdf (section 8.1). // DXC also seems to start with '\01', but we can ignore that. const char *mangle_begin = strchr(user, '?'); if (!mangle_begin) return false; const char *mangle_end = strchr(mangle_begin + 1, '@'); return mangle_end != nullptr; } String demangle_entry_point(const String &entry) { auto start_idx = entry.find_first_of('?'); if (start_idx == std::string::npos) return entry; start_idx++; auto end_idx = entry.find_first_of('@', start_idx); if (end_idx == std::string::npos) return entry; return entry.substr(start_idx, end_idx - start_idx); } Vector &DXILContainerParser::get_blob() { return dxil_blob; } Vector &DXILContainerParser::get_rdat_subobjects() { return rdat_subobjects; } bool DXILContainerParser::parse_dxil(MemoryStream &stream) { DXIL::ProgramHeader program_header; if (!stream.read(program_header)) return false; if (static_cast(program_header.dxil_magic) != DXIL::FourCC::DXIL) return false; constexpr uint32_t DxilMagicPad = sizeof(DXIL::ProgramHeader) - offsetof(DXIL::ProgramHeader, dxil_magic); if (program_header.bitcode_offset < DxilMagicPad) return false; auto substream = stream.create_substream_bitcode_size( stream.get_offset() + program_header.bitcode_offset - DxilMagicPad, program_header.bitcode_size); dxil_blob.resize(substream.get_size()); if (!substream.read(dxil_blob.data(), substream.get_size())) return false; return true; } bool DXILContainerParser::parse_iosg1(MemoryStream &stream, Vector &elements) { uint32_t element_count; if (!stream.read(element_count)) return false; if (!stream.skip(sizeof(uint32_t))) return false; elements.resize(element_count); for (uint32_t i = 0; i < element_count; i++) { if (!stream.read(elements[i].stream_index)) return false; uint32_t string_offset; if (!stream.read(string_offset)) return false; if (!stream.read(elements[i].semantic_index)) return false; if (!stream.read(elements[i].system_value_semantic)) return false; if (!stream.read(elements[i].component_type)) return false; if (!stream.read(elements[i].register_index)) return false; if (!stream.read(elements[i].mask)) return false; if (!stream.read(elements[i].min_precision)) return false; size_t offset = stream.get_offset(); if (!stream.seek(string_offset)) return false; const char *semantic_name; if (!stream.map_string_iterate(semantic_name)) return false; elements[i].semantic_name = semantic_name; if (!stream.seek(offset)) return false; } return true; } bool DXILContainerParser::parse_rdat(MemoryStream &stream) { uint32_t version, part_count; if (!stream.read(version)) return false; if (!stream.read(part_count)) return false; constexpr uint32_t RDAT_Version = 0x10; if (version != RDAT_Version) return false; Vector offsets(part_count); for (uint32_t i = 0; i < part_count; i++) if (!stream.read(offsets[i])) return false; MemoryStream string_buffer; MemoryStream index_buffer; MemoryStream raw_bytes; for (uint32_t i = 0; i < part_count; i++) { if (offsets[i] + 2 * sizeof(uint32_t) > stream.get_size()) return false; uint32_t part_size = i + 1 < part_count ? (offsets[i + 1] - offsets[i]) : uint32_t(stream.get_size() - offsets[i]); auto substream = stream.create_substream(offsets[i], part_size); DXIL::RuntimeDataPartType type; if (!substream.read(type)) return false; uint32_t subpart_length; if (!substream.read(subpart_length)) return false; if (subpart_length + 2 * sizeof(uint32_t) > substream.get_size()) return false; switch (type) { case DXIL::RuntimeDataPartType::StringBuffer: { string_buffer = substream.create_substream(substream.get_offset(), subpart_length); break; } case DXIL::RuntimeDataPartType::IndexArrays: { index_buffer = substream.create_substream(substream.get_offset(), subpart_length); break; } case DXIL::RuntimeDataPartType::RawBytes: { raw_bytes = substream.create_substream(substream.get_offset(), subpart_length); break; } case DXIL::RuntimeDataPartType::SubobjectTable: { uint32_t record_count; uint32_t record_stride; if (!substream.read(record_count)) return false; if (!substream.read(record_stride)) return false; for (unsigned record = 0; record < record_count; record++) { auto record_stream = substream.create_substream(substream.get_offset() + record * record_stride, record_stride); DXIL::SubobjectKind kind; if (!record_stream.read(kind)) return false; switch (kind) { case DXIL::SubobjectKind::StateObjectConfig: { uint32_t name_offset; if (!record_stream.read(name_offset)) return false; const char *str = nullptr; if (!string_buffer.map_string_absolute(str, name_offset)) return false; uint32_t flag; if (!record_stream.read(flag)) return false; RDATSubobject elem = {}; elem.kind = kind; elem.subobject_name = str; elem.args[0] = flag; rdat_subobjects.push_back(std::move(elem)); break; } case DXIL::SubobjectKind::RaytracingShaderConfig: { uint32_t name_offset; if (!record_stream.read(name_offset)) return false; const char *str; if (!string_buffer.map_string_absolute(str, name_offset)) return false; uint32_t max_payload_size, max_attribute_size; if (!record_stream.read(max_payload_size)) return false; if (!record_stream.read(max_attribute_size)) return false; RDATSubobject elem = {}; elem.kind = kind; elem.subobject_name = str; elem.args[0] = max_payload_size; elem.args[1] = max_attribute_size; rdat_subobjects.push_back(std::move(elem)); break; } case DXIL::SubobjectKind::RaytracingPipelineConfig: case DXIL::SubobjectKind::RaytracingPipelineConfig1: { uint32_t name_offset; if (!record_stream.read(name_offset)) return false; const char *str; if (!string_buffer.map_string_absolute(str, name_offset)) return false; uint32_t max_recursion_depth; uint32_t flags = 0; if (!record_stream.read(max_recursion_depth)) return false; if (kind == DXIL::SubobjectKind::RaytracingPipelineConfig1) if (!record_stream.read(flags)) return false; RDATSubobject elem = {}; elem.kind = kind; elem.subobject_name = str; elem.args[0] = max_recursion_depth; elem.args[1] = flags; rdat_subobjects.push_back(std::move(elem)); break; } case DXIL::SubobjectKind::HitGroup: { uint32_t name_offset; if (!record_stream.read(name_offset)) return false; const char *hg_name; if (!string_buffer.map_string_absolute(hg_name, name_offset)) return false; DXIL::HitGroupType hit_group_type; if (!record_stream.read(hit_group_type)) return false; uint32_t ahit_name_offset, chit_name_offset, intersection_name_offset; if (!record_stream.read(ahit_name_offset)) return false; if (!record_stream.read(chit_name_offset)) return false; if (!record_stream.read(intersection_name_offset)) return false; const char *ahit, *chit, *intersection; if (!string_buffer.map_string_absolute(ahit, ahit_name_offset)) return false; if (!string_buffer.map_string_absolute(chit, chit_name_offset)) return false; if (!string_buffer.map_string_absolute(intersection, intersection_name_offset)) return false; RDATSubobject elem = {}; elem.kind = kind; elem.subobject_name = hg_name; elem.hit_group_type = hit_group_type; elem.exports = { ahit, chit, intersection }; rdat_subobjects.push_back(std::move(elem)); break; } case DXIL::SubobjectKind::SubobjectToExportsAssociation: { RDATSubobject elem = {}; elem.kind = kind; uint32_t name_offset; if (!record_stream.read(name_offset)) return false; const char *name; if (!string_buffer.map_string_absolute(name, name_offset)) return false; elem.subobject_name = name; if (!record_stream.read(name_offset)) return false; const char *object_name; if (!string_buffer.map_string_absolute(object_name, name_offset)) return false; elem.exports.push_back(object_name); uint32_t index_offset; if (!record_stream.read(index_offset)) return false; auto index_substream = index_buffer.create_substream(sizeof(uint32_t) * index_offset); uint32_t count; if (!index_substream.read(count)) return false; for (uint32_t export_index = 0; export_index < count; export_index++) { if (!index_substream.read(name_offset)) return false; if (!string_buffer.map_string_absolute(object_name, name_offset)) return false; elem.exports.push_back(object_name); } rdat_subobjects.push_back(std::move(elem)); break; } case DXIL::SubobjectKind::GlobalRootSignature: case DXIL::SubobjectKind::LocalRootSignature: { uint32_t name_offset; if (!record_stream.read(name_offset)) return false; const char *name; if (!string_buffer.map_string_absolute(name, name_offset)) return false; uint32_t byte_offset; uint32_t byte_size; if (!record_stream.read(byte_offset)) return false; if (!record_stream.read(byte_size)) return false; auto name_substream = raw_bytes.create_substream(byte_offset, byte_size); auto *data = name_substream.map_read(byte_size); RDATSubobject elem = {}; elem.kind = kind; elem.subobject_name = name; elem.payload = data; elem.payload_size = byte_size; rdat_subobjects.push_back(std::move(elem)); break; } default: break; } } break; } default: break; } } return true; } bool DXILContainerParser::parse_container(const void *data, size_t size, bool reflection) { MemoryStream stream(data, size); DXIL::ContainerHeader container_header; if (!stream.read(container_header)) return false; if (static_cast(container_header.header_fourcc) != DXIL::FourCC::Container) return false; if (container_header.container_size_in_bytes > size) return false; Vector parts(container_header.part_count); for (uint32_t i = 0; i < container_header.part_count; i++) { if (!stream.read(parts[i])) return false; } for (auto &part_offset : parts) { if (!stream.seek(part_offset)) return false; DXIL::PartHeader part_header; if (!stream.read(part_header)) return false; auto fourcc = static_cast(part_header.part_fourcc); if (fourcc == DXIL::FourCC::SHDR || fourcc == DXIL::FourCC::SHEX) dxbc_binary = true; } for (auto &part_offset : parts) { if (!stream.seek(part_offset)) return false; DXIL::PartHeader part_header; if (!stream.read(part_header)) return false; auto fourcc = static_cast(part_header.part_fourcc); switch (fourcc) { case DXIL::FourCC::DXIL: case DXIL::FourCC::ShaderStatistics: { DXIL::FourCC expected = reflection ? DXIL::FourCC::ShaderStatistics : DXIL::FourCC::DXIL; if (expected != fourcc || dxbc_binary) break; // The STAT block includes a DXIL blob that is literally the same DXIL IR // minus code + string names in the metadata chunks ... <__________________________________< auto substream = stream.create_substream(stream.get_offset(), part_header.part_size); if (!parse_dxil(substream)) return false; break; } case DXIL::FourCC::FeatureInfo: break; case DXIL::FourCC::InputSignature: { auto substream = stream.create_substream(stream.get_offset(), part_header.part_size); if (!parse_iosg1(substream, input_elements)) return false; break; } case DXIL::FourCC::OutputSignature: { auto substream = stream.create_substream(stream.get_offset(), part_header.part_size); if (!parse_iosg1(substream, output_elements)) return false; break; } case DXIL::FourCC::PatchConstantSignature: break; case DXIL::FourCC::PrivateData: break; case DXIL::FourCC::RootSignature: break; case DXIL::FourCC::PipelineStateValidation: break; case DXIL::FourCC::ResourceDef: break; case DXIL::FourCC::ShaderHash: break; case DXIL::FourCC::RuntimeData: { auto substream = stream.create_substream(stream.get_offset(), part_header.part_size); if (!parse_rdat(substream)) return false; break; } default: break; } } return true; } } // namespace dxil_spv ================================================ FILE: dxil_parser.hpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #pragma once #include "thread_local_allocator.hpp" #include "dxil.hpp" #include #include namespace dxil_spv { class MemoryStream; struct RDATSubobject { // All strings point directly to the DXBC blob and the pointers are not owned. DXIL::SubobjectKind kind; // All subobjects have a variable name as declared in the shader. const char *subobject_name; // All exports. // For hit groups, 3 strings: AnyHit, ClosestHit, Intersection. Strings may be empty if not used. // For SubobjectToExportsAssociation: N strings. exports[0] is associated with the following exports. DXIL::HitGroupType hit_group_type; Vector exports; // For StateObjectConfig, RaytracingShaderConfig, RaytracingPipelineConfig(1). // Each element is in struct order. uint32_t args[2]; // For Global/Local Root Signatures. const uint8_t *payload; size_t payload_size; }; class DXILContainerParser { public: bool parse_container(const void *data, size_t size, bool reflection); Vector &get_blob(); Vector &get_rdat_subobjects(); bool is_dxbc_binary() const { return dxbc_binary; } private: Vector dxil_blob; Vector input_elements; Vector output_elements; Vector rdat_subobjects; bool dxbc_binary = false; bool parse_dxil(MemoryStream &stream); bool parse_iosg1(MemoryStream &stream, Vector &elements); bool parse_rdat(MemoryStream &stream); }; bool is_mangled_entry_point(const char *user); String demangle_entry_point(const String &entry); } // namespace dxil_spv ================================================ FILE: dxil_spirv.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #define DXIL_SPV_ENABLE_EXPERIMENTAL_MULTIVIEW #include "dxil_spirv_c.h" #include "cli_parser.hpp" #include "logging.hpp" #include "spirv-tools/libspirv.hpp" #include "spirv_cross_c.h" using namespace dxil_spv; static std::string convert_to_asm(const void *code, size_t size) { spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3); tools.SetMessageConsumer([](spv_message_level_t, const char *, const spv_position_t &, const char *message) { LOGE("SPIRV-Tools message: %s\n", message); }); std::string str; if (!tools.Disassemble(static_cast(code), size / sizeof(uint32_t), &str, 0)) return ""; else return str; } static bool validate_spirv(const void *code, size_t size) { spvtools::SpirvTools tools(SPV_ENV_VULKAN_1_3); bool expected_failure = false; bool unexpected_failure = false; tools.SetMessageConsumer([&](spv_message_level_t, const char *, const spv_position_t &, const char *message) { if (strstr(message, "08721") || strstr(message, "08722")) { LOGW("SPIRV-Tools message expected failure: %s\n", message); expected_failure = true; } else { LOGE("SPIRV-Tools message: %s\n", message); unexpected_failure = true; } }); spvtools::ValidatorOptions opts; opts.SetScalarBlockLayout(true); return tools.Validate(static_cast(code), size / sizeof(uint32_t), opts) || (expected_failure && !unexpected_failure); } static std::string convert_to_glsl(const void *code, size_t size) { std::string ret; spvc_context context; if (spvc_context_create(&context) != SPVC_SUCCESS) return ret; spvc_parsed_ir ir; if (spvc_context_parse_spirv(context, static_cast(code), size / sizeof(uint32_t), &ir) != SPVC_SUCCESS) goto cleanup; spvc_compiler compiler; if (spvc_context_create_compiler(context, SPVC_BACKEND_GLSL, ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler) != SPVC_SUCCESS) goto cleanup; spvc_compiler_options opts; if (spvc_compiler_create_compiler_options(compiler, &opts) != SPVC_SUCCESS) goto cleanup; spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_ES, SPVC_FALSE); spvc_compiler_options_set_uint(opts, SPVC_COMPILER_OPTION_GLSL_VERSION, 460); spvc_compiler_options_set_bool(opts, SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS, SPVC_TRUE); spvc_compiler_install_compiler_options(compiler, opts); const char *source; if (spvc_compiler_compile(compiler, &source) != SPVC_SUCCESS) goto cleanup; ret = source; cleanup: spvc_context_destroy(context); return ret; } static std::vector read_file(const char *path) { FILE *file = fopen(path, "rb"); if (!file) return {}; fseek(file, 0, SEEK_END); auto len = ftell(file); rewind(file); std::vector result(len); if (fread(result.data(), 1, len, file) != size_t(len)) { fclose(file); return {}; } fclose(file); return result; } static void print_help() { LOGE("Usage: dxil-spirv \n" "\t[--output ]\n" "\t[--glsl]\n" "\t[--asm]\n" "\t[--validate]\n" "\t[--entry name]\n" "\t[--debug-all-entry-points]\n" "\t[--root-constant space binding word_offset word_count]\n" "\t[--root-constant-inline-ubo set binding]\n" "\t[--vertex-input semantic location]\n" "\t[--stream-output semantic index offset stride buffer-index]\n" "\t[--enable-shader-demote]\n" "\t[--enable-shader-i8-dot]\n" "\t[--enable-dual-source-blending]\n" "\t[--bindless]\n" "\t[--no-bda]\n" "\t[--local-root-signature]\n" "\t[--uav-counter-force-texel-buffer]\n" "\t[--uav-counter-force-ssbo]\n" "\t[--bindless-cbv-as-ssbo]\n" "\t[--ssbo-uav]\n" "\t[--ssbo-srv]\n" "\t[--ssbo-rtas]\n" "\t[--input-attachments]\n" "\t[--ssbo-alignment ]\n" "\t[--typed-uav-read-without-format]\n" "\t[--bindless-typed-buffer-offsets]\n" "\t[--output-rt-swizzle index xyzw]\n" "\t[--bindless-offset-buffer-layout ]\n" "\t[--storage-input-output-16bit]\n" "\t[--root-descriptor ]\n" "\t[--descriptor-qa ]\n" "\t[--instruction-instrumentation ]\n" "\t[--min-precision-native-16bit]\n" "\t[--raw-llvm]\n" "\t[--use-reflection-names]\n" "\t[--invariant-position]\n" "\t[--robust-physical-cbv-load]\n" "\t[--allow-arithmetic-relaxed-precision]\n" "\t[--physical-address-descriptor-indexing ]\n" "\t[--nvapi ]\n" "\t[--subgroup-partitioned-nv]\n" "\t[--dead-code-eliminate]\n" "\t[--propagate-precise]\n" "\t[--force-precise]\n" "\t[--force-flatten]\n" "\t[--force-loop]\n" "\t[--force-branch]\n" "\t[--force-unroll]\n" "\t[--subgroup-size minimum maximum]\n" "\t[--descriptor-heap-robustness]\n" "\t[--no-compute-shader-derivatives]\n" "\t[--quad-control-maximal-reconvergence]\n" "\t[--force-maximal-reconvergence]\n" "\t[--raw-access-chains-nv]\n" "\t[--extended-robustness]\n" "\t[--vkmm]\n" "\t[--full-wmma ]\n" "\t[--shader-quirk ]\n" "\t[--non-semantic]\n" "\t[--mixed-float-dot-product]\n" "\t[--view-instancing]\n" "\t[--view-instancing-last-pre-rasterization-stage]\n" "\t[--view-instance-to-viewport-spec-id ]\n" "\t[--view-index-to-view-instance-spec-id ]\n" "\t[--meta-descriptor descriptor kind set binding]\n"); } struct MetaDescriptor { dxil_spv_meta_descriptor meta; dxil_spv_meta_descriptor_kind kind; uint32_t desc_set; uint32_t desc_binding; }; struct Arguments { std::string input_path; std::string output_path; std::string entry_point; bool dump_module = false; bool glsl = false; bool emit_asm = false; bool validate = false; bool shader_demote = false; bool shader_i8_dot = false; bool dual_source_blending = false; bool debug_all_entry_points = false; bool storage_input_output_16bit = false; std::vector swizzles; unsigned root_constant_inline_ubo_desc_set = 0; unsigned root_constant_inline_ubo_binding = 0; bool root_constant_inline_ubo = false; bool bindless_cbv_as_ssbo = false; bool typed_uav_read_without_format = false; bool bindless_typed_buffer_offsets = false; bool min_precision_native_16bit = false; bool raw_llvm = false; bool use_reflection_names = false; bool invariant_position = false; bool robust_physical_cbv_load = false; bool allow_arithmetic_relaxed_precision = false; bool subgroup_partitioned_nv = false; bool dead_code_eliminate = false; bool propagate_precise = false; bool force_precise = false; bool opacity_micromap = false; bool force_flatten = false; bool force_loop = false; bool force_branch = false; bool force_unroll = false; bool descriptor_heap_robustness = false; bool compute_shader_derivatives = true; bool quad_control_maximal_reconvergence = false; bool force_maximal_reconvergence = false; bool raw_access_chains_nv = false; bool extended_robustness = false; bool vkmm = false; bool wmma_fp8 = false; bool wmma_nv_coopmat2 = false; bool non_semantic = false; bool mixed_float_dot_product = false; std::vector quirks; unsigned ssbo_alignment = 1; unsigned physical_address_indexing_stride = 1; unsigned physical_address_indexing_offset = 0; unsigned subgroup_size_minimum = 4; unsigned subgroup_size_maximum = 128; bool descriptor_qa = false; uint32_t descriptor_qa_set = 0; uint32_t descriptor_qa_binding = 0; bool nvapi = false; unsigned nvapi_register_index = 0; unsigned nvapi_register_space = 0; bool instruction_instrumentation = false; uint32_t instruction_instrumentation_set = 0; uint32_t instruction_instrumentation_binding = 0; dxil_spv_instruction_instrumentation_type instruction_instrumentation_type = {}; uint64_t shader_hash = 0; dxil_spv_option_bindless_offset_buffer_layout offset_buffer_layout; std::vector meta_descriptors; bool view_instancing = false; bool view_instancing_last_pre_rasterization_stage = false; uint32_t view_index_to_view_instance_spec_id = UINT32_MAX; uint32_t view_instance_to_viewport_spec_id = UINT32_MAX; }; struct Remapper { struct RootConstant { unsigned register_space; unsigned register_index; unsigned word_offset; }; struct RootDescriptor { dxil_spv_resource_class resource_class; uint32_t space; uint32_t register_index; }; std::vector root_constants; unsigned root_constant_word_count = 0; std::vector root_descriptors; struct VertexInput { std::string semantic; unsigned index; }; std::vector vertex_inputs; struct StreamOutput { std::string semantic; unsigned index; unsigned offset; unsigned stride; unsigned buffer_index; }; std::vector stream_outputs; bool bindless = false; bool bda = true; bool uav_counter_force_texel_buffer = false; bool uav_counter_force_ssbo = false; bool ssbo_uav = false; bool ssbo_srv = false; bool ssbo_rtas = false; bool input_attachments = false; }; static bool kind_is_buffer(dxil_spv_resource_kind kind) { return kind == DXIL_SPV_RESOURCE_KIND_RAW_BUFFER || kind == DXIL_SPV_RESOURCE_KIND_STRUCTURED_BUFFER || kind == DXIL_SPV_RESOURCE_KIND_TYPED_BUFFER; } static int32_t find_root_descriptor_index(const Remapper *remapper, const dxil_spv_d3d_binding *binding, dxil_spv_resource_class resource_class) { auto itr = std::find_if(remapper->root_descriptors.begin(), remapper->root_descriptors.end(), [&](const Remapper::RootDescriptor &desc) { return desc.resource_class == resource_class && desc.space == binding->register_space && desc.register_index == binding->register_index; }); if (itr != remapper->root_descriptors.end()) return int32_t(itr - remapper->root_descriptors.begin()); else return -1; } static bool d3d_binding_is_global_heap(const dxil_spv_d3d_binding &binding) { return binding.register_index == UINT32_MAX && binding.register_space == UINT32_MAX && binding.range_size == UINT32_MAX; } static dxil_spv_bool remap_srv(void *userdata, const dxil_spv_d3d_binding *binding, dxil_spv_srv_vulkan_binding *vk_binding) { auto *remapper = static_cast(userdata); *vk_binding = {}; int32_t desc_index = find_root_descriptor_index(remapper, binding, DXIL_SPV_RESOURCE_CLASS_SRV); if (desc_index >= 0) { vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; vk_binding->buffer_binding.root_constant_index = uint32_t(desc_index); } else { bool is_global_heap = d3d_binding_is_global_heap(*binding); if (is_global_heap) { vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE; vk_binding->buffer_binding.set = 0; vk_binding->buffer_binding.binding = 0; } else if (remapper->bindless) { vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE; vk_binding->buffer_binding.bindless.heap_root_offset = binding->register_index; vk_binding->buffer_binding.root_constant_index = kind_is_buffer(binding->kind) ? 1 : 0; vk_binding->buffer_binding.set = kind_is_buffer(binding->kind) ? 1 : 0; vk_binding->buffer_binding.binding = 0; } else { vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE; vk_binding->buffer_binding.set = binding->register_space; vk_binding->buffer_binding.binding = binding->register_index; } if (binding->kind == DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE) if ((remapper->bindless || is_global_heap) && remapper->ssbo_rtas) vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; if (remapper->input_attachments && (binding->register_space == 1000 || binding->register_space == 1001) && (binding->kind == DXIL_SPV_RESOURCE_KIND_TEXTURE_2D || binding->kind == DXIL_SPV_RESOURCE_KIND_TEXTURE_2DMS)) { vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE; vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT; vk_binding->buffer_binding.input_attachment_index = binding->register_space == 1000 ? binding->register_index : -1u; } if (remapper->ssbo_srv) { if (binding->kind == DXIL_SPV_RESOURCE_KIND_STRUCTURED_BUFFER || binding->kind == DXIL_SPV_RESOURCE_KIND_RAW_BUFFER) { vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; } } // In case it's needed, place offset buffer here. vk_binding->offset_binding.set = 15; vk_binding->offset_binding.binding = 0; } return DXIL_SPV_TRUE; } static dxil_spv_bool remap_sampler(void *userdata, const dxil_spv_d3d_binding *binding, dxil_spv_vulkan_binding *vk_binding) { auto *remapper = static_cast(userdata); *vk_binding = {}; if (d3d_binding_is_global_heap(*binding)) { vk_binding->bindless.use_heap = DXIL_SPV_TRUE; vk_binding->set = 0; vk_binding->binding = 0; } else if (remapper->bindless) { vk_binding->bindless.use_heap = DXIL_SPV_TRUE; vk_binding->bindless.heap_root_offset = binding->register_index; vk_binding->root_constant_index = 2; vk_binding->set = 2; vk_binding->binding = 0; } else { vk_binding->bindless.use_heap = DXIL_SPV_FALSE; vk_binding->set = binding->register_space; vk_binding->binding = binding->register_index; } return DXIL_SPV_TRUE; } static dxil_spv_bool remap_uav(void *userdata, const dxil_spv_uav_d3d_binding *binding, dxil_spv_uav_vulkan_binding *vk_binding) { auto *remapper = static_cast(userdata); *vk_binding = {}; int32_t desc_index = find_root_descriptor_index(remapper, &binding->d3d_binding, DXIL_SPV_RESOURCE_CLASS_UAV); if (desc_index >= 0) { vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; vk_binding->buffer_binding.root_constant_index = uint32_t(desc_index); } else { bool binding_is_global_heap = d3d_binding_is_global_heap(binding->d3d_binding); if (binding_is_global_heap) { vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE; vk_binding->buffer_binding.set = 0; vk_binding->buffer_binding.binding = 0; } else if (remapper->bindless) { vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_TRUE; vk_binding->buffer_binding.bindless.heap_root_offset = binding->d3d_binding.register_index; vk_binding->buffer_binding.root_constant_index = kind_is_buffer(binding->d3d_binding.kind) ? 4 : 3; vk_binding->buffer_binding.set = kind_is_buffer(binding->d3d_binding.kind) ? 4 : 3; vk_binding->buffer_binding.binding = 0; } else { vk_binding->buffer_binding.bindless.use_heap = DXIL_SPV_FALSE; vk_binding->buffer_binding.set = binding->d3d_binding.register_space; vk_binding->buffer_binding.binding = binding->d3d_binding.register_index; } if (remapper->ssbo_uav) { if (binding->d3d_binding.kind == DXIL_SPV_RESOURCE_KIND_STRUCTURED_BUFFER || binding->d3d_binding.kind == DXIL_SPV_RESOURCE_KIND_RAW_BUFFER) { vk_binding->buffer_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; } } vk_binding->offset_binding.set = 15; vk_binding->offset_binding.binding = 0; if (binding->has_counter) { if (remapper->bindless || binding_is_global_heap) { vk_binding->counter_binding.bindless.use_heap = DXIL_SPV_TRUE; vk_binding->counter_binding.root_constant_index = 4; vk_binding->counter_binding.bindless.heap_root_offset = binding->d3d_binding.register_index; vk_binding->counter_binding.set = 7; vk_binding->counter_binding.binding = 0; } else { vk_binding->counter_binding.bindless.use_heap = DXIL_SPV_FALSE; vk_binding->counter_binding.set = 7; vk_binding->counter_binding.binding = binding->d3d_binding.resource_index; } if (remapper->uav_counter_force_texel_buffer) vk_binding->counter_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_TEXEL_BUFFER; else if (remapper->uav_counter_force_ssbo) vk_binding->counter_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_SSBO; } } return DXIL_SPV_TRUE; } static dxil_spv_bool remap_cbv(void *userdata, const dxil_spv_d3d_binding *binding, dxil_spv_cbv_vulkan_binding *vk_binding) { auto *remapper = static_cast(userdata); *vk_binding = {}; int32_t desc_index = find_root_descriptor_index(remapper, binding, DXIL_SPV_RESOURCE_CLASS_CBV); if (desc_index >= 0) { vk_binding->push_constant = DXIL_SPV_FALSE; vk_binding->vulkan.uniform_binding.descriptor_type = DXIL_SPV_VULKAN_DESCRIPTOR_TYPE_BUFFER_DEVICE_ADDRESS; vk_binding->vulkan.uniform_binding.root_constant_index = uint32_t(desc_index); } else { auto itr = std::find_if( remapper->root_constants.begin(), remapper->root_constants.end(), [&](const Remapper::RootConstant &root) { return root.register_space == binding->register_space && root.register_index == binding->register_index; }); if (itr != remapper->root_constants.end()) { vk_binding->push_constant = DXIL_SPV_TRUE; vk_binding->vulkan.push_constant.offset_in_words = itr->word_offset; } else { if (d3d_binding_is_global_heap(*binding)) { vk_binding->vulkan.uniform_binding.bindless.use_heap = DXIL_SPV_TRUE; vk_binding->vulkan.uniform_binding.set = 0; vk_binding->vulkan.uniform_binding.binding = 0; } else if (remapper->bindless) { vk_binding->vulkan.uniform_binding.bindless.use_heap = DXIL_SPV_TRUE; vk_binding->vulkan.uniform_binding.bindless.heap_root_offset = binding->register_index; vk_binding->vulkan.uniform_binding.root_constant_index = 5; vk_binding->vulkan.uniform_binding.set = 5; vk_binding->vulkan.uniform_binding.binding = 0; } else { vk_binding->vulkan.uniform_binding.bindless.use_heap = DXIL_SPV_FALSE; vk_binding->vulkan.uniform_binding.set = binding->register_space; vk_binding->vulkan.uniform_binding.binding = binding->register_index; } } } return DXIL_SPV_TRUE; } static dxil_spv_bool remap_vertex_input(void *userdata, const dxil_spv_d3d_vertex_input *d3d_input, dxil_spv_vulkan_vertex_input *vk_input) { auto *remapper = static_cast(userdata); auto itr = std::find_if(remapper->vertex_inputs.begin(), remapper->vertex_inputs.end(), [&](const Remapper::VertexInput &vin) { return vin.semantic == d3d_input->semantic; }); if (itr != remapper->vertex_inputs.end()) { vk_input->location = itr->index + d3d_input->semantic_index; } else { vk_input->location = d3d_input->start_row; } return DXIL_SPV_TRUE; } #ifdef _MSC_VER #define strcasecmp _stricmp #endif static dxil_spv_bool remap_stream_output(void *userdata, const dxil_spv_d3d_stream_output *d3d_output, dxil_spv_vulkan_stream_output *vk_output) { auto *remapper = static_cast(userdata); auto itr = std::find_if(remapper->stream_outputs.begin(), remapper->stream_outputs.end(), [&](const Remapper::StreamOutput &vin) { return strcasecmp(vin.semantic.c_str(), d3d_output->semantic) == 0 && vin.index == d3d_output->semantic_index; }); if (itr != remapper->stream_outputs.end()) { vk_output->enable = DXIL_SPV_TRUE; vk_output->offset = itr->offset; vk_output->stride = itr->stride; vk_output->buffer_index = itr->buffer_index; } else { *vk_output = {}; } return DXIL_SPV_TRUE; } int main(int argc, char **argv) { Arguments args; Remapper remapper; bool local_root_signature = false; dxil_spv_begin_thread_allocator_context(); args.offset_buffer_layout.base.type = DXIL_SPV_OPTION_BINDLESS_OFFSET_BUFFER_LAYOUT; args.offset_buffer_layout.untyped_offset = 0; args.offset_buffer_layout.typed_offset = 0; args.offset_buffer_layout.stride = 1; // Begin with identity swizzles. args.swizzles.resize(8, 0 | (1 << 2) | (2 << 4) | (3 << 6)); CLICallbacks cbs; cbs.add("--help", [](CLIParser &parser) { print_help(); parser.end(); }); cbs.add("--dump-module", [&](CLIParser &) { args.dump_module = true; }); cbs.add("--glsl", [&](CLIParser &) { args.glsl = true; }); cbs.add("--asm", [&](CLIParser &) { args.emit_asm = true; }); cbs.add("--validate", [&](CLIParser &) { args.validate = true; }); cbs.add("--output", [&](CLIParser &parser) { args.output_path = parser.next_string(); }); cbs.add("--root-constant", [&](CLIParser &parser) { Remapper::RootConstant root = {}; root.register_space = parser.next_uint(); root.register_index = parser.next_uint(); root.word_offset = parser.next_uint(); unsigned word_count = parser.next_uint(); remapper.root_constant_word_count = std::max(remapper.root_constant_word_count, word_count + root.word_offset); remapper.root_constants.push_back(root); }); cbs.add("--vertex-input", [&](CLIParser &parser) { const char *sem = parser.next_string(); unsigned loc = parser.next_uint(); remapper.vertex_inputs.push_back({ std::string(sem), loc }); }); cbs.add("--stream-output", [&](CLIParser &parser) { const char *sem = parser.next_string(); unsigned index = parser.next_uint(); unsigned offset = parser.next_uint(); unsigned stride = parser.next_uint(); unsigned buffer_index = parser.next_uint(); remapper.stream_outputs.push_back({ std::string(sem), index, offset, stride, buffer_index }); }); cbs.add("--enable-shader-demote", [&](CLIParser &) { args.shader_demote = true; }); cbs.add("--enable-shader-i8-dot", [&](CLIParser &parser) { args.shader_i8_dot = true; }); cbs.add("--enable-dual-source-blending", [&](CLIParser &) { args.dual_source_blending = true; }); cbs.add("--bindless", [&](CLIParser &) { remapper.bindless = true; remapper.root_constant_word_count = std::max(remapper.root_constant_word_count, 8u); }); cbs.add("--no-bda", [&](CLIParser &) { remapper.bda = false; }); cbs.add("--uav-counter-force-texel-buffer", [&](CLIParser &) { remapper.uav_counter_force_texel_buffer = true; }); cbs.add("--uav-counter-force-ssbo", [&](CLIParser &) { remapper.uav_counter_force_ssbo = true; }); cbs.add("--local-root-signature", [&](CLIParser &) { local_root_signature = true; }); cbs.add("--root-descriptor", [&](CLIParser &parser) { const char *tag = parser.next_string(); uint32_t space = parser.next_uint(); uint32_t register_index = parser.next_uint(); dxil_spv_resource_class resource_class; if (!strcmp(tag, "cbv")) resource_class = DXIL_SPV_RESOURCE_CLASS_CBV; else if (!strcmp(tag, "uav")) resource_class = DXIL_SPV_RESOURCE_CLASS_UAV; else if (!strcmp(tag, "srv")) resource_class = DXIL_SPV_RESOURCE_CLASS_SRV; else { LOGE("Invalid resource class %s, ignoring.\n", tag); return; } remapper.root_descriptors.push_back({ resource_class, space, register_index }); }); cbs.add("--output-rt-swizzle", [&](CLIParser &parser) { unsigned index = parser.next_uint(); if (index >= args.swizzles.size()) { LOGE("RT index out of range.\n"); print_help(); parser.end(); return; } const char *arg = parser.next_string(); if (strlen(arg) != 4) { LOGE("RT swizzle must be 4 characters (x, y, z, w).\n"); print_help(); parser.end(); return; } auto &swiz = args.swizzles[index]; swiz = 0; for (unsigned c = 0; c < 4; c++) { switch (arg[c]) { case 'x': case 'X': case 'r': case 'R': swiz |= 0 << (2 * c); break; case 'y': case 'Y': case 'g': case 'G': swiz |= 1 << (2 * c); break; case 'z': case 'Z': case 'b': case 'B': swiz |= 2 << (2 * c); break; case 'w': case 'W': case 'a': case 'A': swiz |= 3 << (2 * c); break; default: LOGE("Invalid swizzle character %c.\n", arg[c]); print_help(); parser.end(); return; } } }); cbs.add("--root-constant-inline-ubo", [&](CLIParser &parser) { args.root_constant_inline_ubo_desc_set = parser.next_uint(); args.root_constant_inline_ubo_binding = parser.next_uint(); args.root_constant_inline_ubo = true; }); cbs.add("--bindless-cbv-as-ssbo", [&](CLIParser &) { args.bindless_cbv_as_ssbo = true; }); cbs.add("--ssbo-uav", [&](CLIParser &) { remapper.ssbo_uav = true; }); cbs.add("--ssbo-srv", [&](CLIParser &) { remapper.ssbo_srv = true; }); cbs.add("--ssbo-rtas", [&](CLIParser &) { remapper.ssbo_rtas = true; }); cbs.add("--input-attachments", [&](CLIParser &) { remapper.input_attachments = true; }); cbs.add("--ssbo-alignment", [&](CLIParser &parser) { args.ssbo_alignment = parser.next_uint(); }); cbs.add("--typed-uav-read-without-format", [&](CLIParser &) { args.typed_uav_read_without_format = true; }); cbs.add("--bindless-typed-buffer-offsets", [&](CLIParser &) { args.bindless_typed_buffer_offsets = true; }); cbs.add("--bindless-offset-buffer-layout", [&](CLIParser &parser) { args.offset_buffer_layout.untyped_offset = parser.next_uint(); args.offset_buffer_layout.typed_offset = parser.next_uint(); args.offset_buffer_layout.stride = parser.next_uint(); }); cbs.add("--entry", [&](CLIParser &parser) { args.entry_point = parser.next_string(); }); cbs.add("--debug-all-entry-points", [&](CLIParser &parser) { args.debug_all_entry_points = true; }); cbs.add("--storage-input-output-16bit", [&](CLIParser &parser) { args.storage_input_output_16bit = true; }); cbs.add("--descriptor-qa", [&](CLIParser &parser) { args.descriptor_qa = true; args.descriptor_qa_set = parser.next_uint(); args.descriptor_qa_binding = parser.next_uint(); args.shader_hash = uint64_t(strtoull(parser.next_string(), nullptr, 16)); }); cbs.add("--instruction-instrumentation", [&](CLIParser &parser) { args.instruction_instrumentation = true; args.instruction_instrumentation_type = dxil_spv_instruction_instrumentation_type(parser.next_uint()); args.instruction_instrumentation_set = parser.next_uint(); args.instruction_instrumentation_binding = parser.next_uint(); args.shader_hash = uint64_t(strtoull(parser.next_string(), nullptr, 16)); }); cbs.add("--min-precision-native-16bit", [&](CLIParser &) { args.min_precision_native_16bit = true; }); cbs.add("--raw-llvm", [&](CLIParser &) { args.raw_llvm = true; }); cbs.add("--use-reflection-names", [&](CLIParser &) { args.use_reflection_names = true; }); cbs.add("--invariant-position", [&](CLIParser &) { args.invariant_position = true; }); cbs.add("--robust-physical-cbv-load", [&](CLIParser &) { args.robust_physical_cbv_load = true; }); cbs.add("--allow-arithmetic-relaxed-precision", [&](CLIParser &) { args.allow_arithmetic_relaxed_precision = true; }); cbs.add("--physical-address-descriptor-indexing", [&](CLIParser &parser) { args.physical_address_indexing_stride = parser.next_uint(); args.physical_address_indexing_offset = parser.next_uint(); }); cbs.add("--nvapi", [&](CLIParser &parser) { args.nvapi = true; args.nvapi_register_index = parser.next_uint(); args.nvapi_register_space = parser.next_uint(); }); cbs.add("--subgroup-partitioned-nv", [&](CLIParser &) { args.subgroup_partitioned_nv = true; }); cbs.add("--dead-code-eliminate", [&](CLIParser &) { args.dead_code_eliminate = true; }); cbs.add("--propagate-precise", [&](CLIParser &) { args.propagate_precise = true; }); cbs.add("--force-precise", [&](CLIParser &) { args.force_precise = true; }); cbs.add("--opacity-micromap", [&](CLIParser &) { args.opacity_micromap = true; }); cbs.add("--force-flatten", [&](CLIParser &) { args.force_flatten = true; }); cbs.add("--force-loop", [&](CLIParser &) { args.force_loop = true; }); cbs.add("--force-unroll", [&](CLIParser &) { args.force_unroll = true; }); cbs.add("--force-branch", [&](CLIParser &) { args.force_branch = true; }); cbs.add("--subgroup-size", [&](CLIParser &parser) { args.subgroup_size_minimum = parser.next_uint(); args.subgroup_size_maximum = parser.next_uint(); }); cbs.add("--descriptor-heap-robustness", [&](CLIParser &) { args.descriptor_heap_robustness = true; }); cbs.add("--no-compute-shader-derivatives", [&](CLIParser &) { args.compute_shader_derivatives = false; }); cbs.add("--quad-control-maximal-reconvergence", [&](CLIParser &) { args.quad_control_maximal_reconvergence = true; }); cbs.add("--force-maximal-reconvergence", [&](CLIParser &) { args.force_maximal_reconvergence = true; }); cbs.add("--raw-access-chains-nv", [&](CLIParser &) { args.raw_access_chains_nv = true; }); cbs.add("--extended-robustness", [&](CLIParser &) { args.extended_robustness = true; }); cbs.add("--vkmm", [&](CLIParser &) { args.vkmm = true; }); cbs.add("--full-wmma", [&](CLIParser &parser) { args.wmma_fp8 = parser.next_uint() != 0; args.wmma_nv_coopmat2 = parser.next_uint() != 0; }); cbs.add("--shader-quirk", [&](CLIParser &parser) { args.quirks.push_back(dxil_spv_shader_quirk(parser.next_uint())); }); cbs.add("--non-semantic", [&](CLIParser &) { args.non_semantic = true; }); cbs.add("--mixed-float-dot-product", [&](CLIParser &) { args.mixed_float_dot_product = true; }); cbs.add("--meta-descriptor", [&](CLIParser &parser) { MetaDescriptor meta = {}; meta.meta = dxil_spv_meta_descriptor(parser.next_uint()); meta.kind = dxil_spv_meta_descriptor_kind(parser.next_uint()); meta.desc_set = parser.next_uint(); meta.desc_binding = parser.next_uint(); args.meta_descriptors.push_back(meta); }); cbs.add("--view-instancing", [&](CLIParser &parser) { args.view_instancing = true; }); cbs.add("--view-instancing-last-pre-rasterization-stage", [&](CLIParser &parser) { args.view_instancing_last_pre_rasterization_stage = true; }); cbs.add("--view-instance-to-viewport-spec-id", [&](CLIParser &parser) { args.view_instance_to_viewport_spec_id = parser.next_uint(); }); cbs.add("--view-index-to-view-instance-spec-id", [&](CLIParser &parser) { args.view_index_to_view_instance_spec_id = parser.next_uint(); }); cbs.error_handler = [] { print_help(); }; cbs.default_handler = [&](const char *arg) { args.input_path = arg; }; CLIParser cli_parser(std::move(cbs), argc - 1, argv + 1); if (!cli_parser.parse()) return EXIT_FAILURE; else if (cli_parser.is_ended_state()) return EXIT_SUCCESS; if (args.input_path.empty()) { LOGE("No input file.\n"); print_help(); return EXIT_FAILURE; } auto binary = read_file(args.input_path.c_str()); if (binary.empty()) { LOGE("Failed to load file: %s\n", args.input_path.c_str()); return EXIT_FAILURE; } dxil_spv_parsed_blob reflection_blob = nullptr; dxil_spv_parsed_blob blob; if (args.raw_llvm) { if (dxil_spv_parse_dxil(binary.data(), binary.size(), &blob) != DXIL_SPV_SUCCESS) { LOGE("Failed to parse raw LLVM blob.\n"); return EXIT_FAILURE; } } else { if (dxil_spv_parse_dxil_blob(binary.data(), binary.size(), &blob) != DXIL_SPV_SUCCESS) { LOGE("Failed to parse blob.\n"); return EXIT_FAILURE; } } if (args.use_reflection_names) { auto result = dxil_spv_parse_reflection_dxil_blob(binary.data(), binary.size(), &reflection_blob); if (result != DXIL_SPV_SUCCESS && result != DXIL_SPV_ERROR_NO_DATA) { LOGE("Failed to parse blob.\n"); return EXIT_FAILURE; } else if (result == DXIL_SPV_ERROR_NO_DATA) { LOGW("No STAT block found in DXIL blob.\n"); reflection_blob = nullptr; } } if (args.dump_module) dxil_spv_parsed_blob_dump_llvm_ir(blob); dxil_spv_converter converter; if (dxil_spv_create_converter_with_reflection(blob, reflection_blob, &converter) != DXIL_SPV_SUCCESS) return EXIT_FAILURE; dxil_spv_converter_set_srv_remapper(converter, remap_srv, &remapper); dxil_spv_converter_set_sampler_remapper(converter, remap_sampler, &remapper); dxil_spv_converter_set_uav_remapper(converter, remap_uav, &remapper); dxil_spv_converter_set_cbv_remapper(converter, remap_cbv, &remapper); dxil_spv_converter_set_vertex_input_remapper(converter, remap_vertex_input, &remapper); dxil_spv_converter_set_stream_output_remapper(converter, remap_stream_output, &remapper); dxil_spv_converter_set_root_constant_word_count(converter, remapper.root_constant_word_count); dxil_spv_converter_set_root_descriptor_count(converter, remapper.root_descriptors.size()); if (local_root_signature) { dxil_spv_converter_add_local_root_constants(converter, 15, 0, 5); dxil_spv_converter_add_local_root_constants(converter, 15, 1, 6); dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_SRV, 15, 1); dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_UAV, 15, 1); dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_SRV, 15, 2); dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_UAV, 15, 2); dxil_spv_converter_add_local_root_descriptor(converter, DXIL_SPV_RESOURCE_CLASS_CBV, 15, 2); dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_SRV, 15, 3, ~0u, 10); dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_UAV, 15, 3, ~0u, 11); dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_CBV, 15, 3, ~0u, 12); dxil_spv_converter_add_local_root_descriptor_table(converter, DXIL_SPV_RESOURCE_CLASS_SAMPLER, 15, 3, ~0u, 13); dxil_spv_option_sbt_descriptor_size_log2 desc_size = { { DXIL_SPV_OPTION_SBT_DESCRIPTOR_SIZE_LOG2 }, 6, 5 }; dxil_spv_converter_add_option(converter, &desc_size.base); } if (remapper.bindless) { // Dummy mappings. for (uint32_t i = 0; i < 64; i++) dxil_spv_converter_add_root_parameter_mapping(converter, i, 4 * i); } if (args.shader_demote) { const dxil_spv_option_shader_demote_to_helper helper = { { DXIL_SPV_OPTION_SHADER_DEMOTE_TO_HELPER }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &helper.base); } if (args.shader_i8_dot) { const dxil_spv_option_shader_i8_dot helper = { { DXIL_SPV_OPTION_SHADER_I8_DOT }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &helper.base); } { const dxil_spv_option_shader_ray_tracing_primitive_culling helper = { { DXIL_SPV_OPTION_SHADER_RAY_TRACING_PRIMITIVE_CULLING }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &helper.base); } if (args.dual_source_blending) { const dxil_spv_option_dual_source_blending helper = { { DXIL_SPV_OPTION_DUAL_SOURCE_BLENDING }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &helper.base); } const dxil_spv_option_output_swizzle swizzle = { { DXIL_SPV_OPTION_OUTPUT_SWIZZLE }, args.swizzles.data(), unsigned(args.swizzles.size()) }; dxil_spv_converter_add_option(converter, &swizzle.base); if (args.root_constant_inline_ubo) { const dxil_spv_option_root_constant_inline_uniform_block inline_block = { { DXIL_SPV_OPTION_ROOT_CONSTANT_INLINE_UNIFORM_BLOCK }, args.root_constant_inline_ubo_desc_set, args.root_constant_inline_ubo_binding, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &inline_block.base); } if (args.bindless_cbv_as_ssbo) { const dxil_spv_option_bindless_cbv_ssbo_emulation cbv = { { DXIL_SPV_OPTION_BINDLESS_CBV_SSBO_EMULATION }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &cbv.base); } if (remapper.bindless || !remapper.root_descriptors.empty() || local_root_signature) { const dxil_spv_option_physical_storage_buffer phys = { { DXIL_SPV_OPTION_PHYSICAL_STORAGE_BUFFER }, remapper.bda ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &phys.base); } { dxil_spv_option_ssbo_alignment align = { { DXIL_SPV_OPTION_SSBO_ALIGNMENT }, args.ssbo_alignment }; dxil_spv_converter_add_option(converter, &align.base); } { dxil_spv_option_typed_uav_read_without_format support = { { DXIL_SPV_OPTION_TYPED_UAV_READ_WITHOUT_FORMAT }, args.typed_uav_read_without_format }; dxil_spv_converter_add_option(converter, &support.base); } { dxil_spv_option_bindless_typed_buffer_offsets offsets = { { DXIL_SPV_OPTION_BINDLESS_TYPED_BUFFER_OFFSETS }, args.bindless_typed_buffer_offsets ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &offsets.base); } { dxil_spv_option_storage_input_output_16bit storage = { { DXIL_SPV_OPTION_STORAGE_INPUT_OUTPUT_16BIT }, args.storage_input_output_16bit ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &storage.base); } { const dxil_spv_option_descriptor_qa qa = { { DXIL_SPV_OPTION_DESCRIPTOR_QA }, args.descriptor_qa ? DXIL_SPV_TRUE : DXIL_SPV_FALSE, DXIL_SPV_DESCRIPTOR_QA_INTERFACE_VERSION, args.descriptor_qa_set, args.descriptor_qa_binding, args.descriptor_qa_set, args.descriptor_qa_binding + 1, args.shader_hash }; dxil_spv_converter_add_option(converter, &qa.base); } { const dxil_spv_option_instruction_instrumentation inst = { { DXIL_SPV_OPTION_INSTRUCTION_INSTRUMENTATION }, args.instruction_instrumentation ? DXIL_SPV_TRUE : DXIL_SPV_FALSE, DXIL_SPV_INSTRUCTION_INSTRUMENTATION_INTERFACE_VERSION, args.instruction_instrumentation_set, args.instruction_instrumentation_binding, args.instruction_instrumentation_set, args.instruction_instrumentation_binding + 1, args.shader_hash, args.instruction_instrumentation_type, }; dxil_spv_converter_add_option(converter, &inst.base); } { const dxil_spv_option_min_precision_native_16bit minprec = { { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, args.min_precision_native_16bit ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &minprec.base); } { const dxil_spv_option_invariant_position invariant = { { DXIL_SPV_OPTION_INVARIANT_POSITION }, args.invariant_position ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &invariant.base); } { const dxil_spv_option_scalar_block_layout scalar = { { DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT }, DXIL_SPV_TRUE, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &scalar.base); } { const dxil_spv_option_barycentric_khr bary = { { DXIL_SPV_OPTION_BARYCENTRIC_KHR }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &bary.base); } if (args.robust_physical_cbv_load) { const dxil_spv_option_robust_physical_cbv_load cbv = { { DXIL_SPV_OPTION_ROBUST_PHYSICAL_CBV_LOAD }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &cbv.base); } if (args.allow_arithmetic_relaxed_precision) { const dxil_spv_option_arithmetic_relaxed_precision relaxed = { { DXIL_SPV_OPTION_ARITHMETIC_RELAXED_PRECISION }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &relaxed.base); } { const dxil_spv_option_physical_address_descriptor_indexing indexing = { { DXIL_SPV_OPTION_PHYSICAL_ADDRESS_DESCRIPTOR_INDEXING }, args.physical_address_indexing_stride, args.physical_address_indexing_offset }; dxil_spv_converter_add_option(converter, &indexing.base); } { const dxil_spv_option_denorm_preserve_support denorm = { { DXIL_SPV_OPTION_DENORM_PRESERVE_SUPPORT }, DXIL_SPV_TRUE, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &denorm.base); } { const dxil_spv_option_subgroup_partitioned_nv partitioned = { { DXIL_SPV_OPTION_SUBGROUP_PARTITIONED_NV }, args.subgroup_partitioned_nv ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &partitioned.base); } { const dxil_spv_option_dead_code_eliminate eliminate = { { DXIL_SPV_OPTION_DEAD_CODE_ELIMINATE }, args.dead_code_eliminate ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &eliminate.base); } { const dxil_spv_option_precise_control precise = { { DXIL_SPV_OPTION_PRECISE_CONTROL }, args.force_precise ? DXIL_SPV_TRUE : DXIL_SPV_FALSE, args.propagate_precise ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &precise.base); } { const dxil_spv_option_opacity_micromap omm = { { DXIL_SPV_OPTION_OPACITY_MICROMAP }, args.opacity_micromap ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_converter_add_option(converter, &omm.base); } { dxil_spv_option_branch_control branch = { { DXIL_SPV_OPTION_BRANCH_CONTROL } }; branch.force_flatten = args.force_flatten ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; branch.force_loop = args.force_loop ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; branch.force_unroll = args.force_unroll ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; branch.force_branch = args.force_branch ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; dxil_spv_converter_add_option(converter, &branch.base); } { const dxil_spv_option_subgroup_properties props = { { DXIL_SPV_OPTION_SUBGROUP_PROPERTIES }, args.subgroup_size_minimum, args.subgroup_size_maximum }; dxil_spv_converter_add_option(converter, &props.base); } if (args.descriptor_heap_robustness) { const dxil_spv_option_descriptor_heap_robustness robustness = { { DXIL_SPV_OPTION_DESCRIPTOR_HEAP_ROBUSTNESS }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &robustness.base); } { const dxil_spv_option_compute_shader_derivatives derivs = { { DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES }, args.compute_shader_derivatives ? DXIL_SPV_TRUE : DXIL_SPV_FALSE, args.compute_shader_derivatives ? DXIL_SPV_TRUE : DXIL_SPV_FALSE, }; dxil_spv_converter_add_option(converter, &derivs.base); if (args.compute_shader_derivatives) { const dxil_spv_option_compute_shader_derivatives_quad quad = { { DXIL_SPV_OPTION_COMPUTE_SHADER_DERIVATIVES_QUAD }, DXIL_SPV_TRUE, }; dxil_spv_converter_add_option(converter, &quad.base); } } { dxil_spv_option_quad_control_reconvergence reconv = { { DXIL_SPV_OPTION_QUAD_CONTROL_RECONVERGENCE } }; reconv.force_maximal_reconvergence = args.force_maximal_reconvergence ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; reconv.supports_maximal_reconvergence = (args.quad_control_maximal_reconvergence || args.force_maximal_reconvergence) ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; reconv.supports_quad_control = args.quad_control_maximal_reconvergence; dxil_spv_converter_add_option(converter, &reconv.base); } if (args.raw_access_chains_nv) { const dxil_spv_option_raw_access_chains_nv chain = { { DXIL_SPV_OPTION_RAW_ACCESS_CHAINS_NV }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &chain.base); } if (args.extended_robustness) { dxil_spv_option_extended_robustness robust = { { DXIL_SPV_OPTION_EXTENDED_ROBUSTNESS } }; robust.robust_constant_lut = DXIL_SPV_TRUE; robust.robust_alloca = DXIL_SPV_TRUE; dxil_spv_converter_add_option(converter, &robust.base); } if (args.vkmm) { dxil_spv_option_extended_robustness vkmm = { { DXIL_SPV_OPTION_VULKAN_MEMORY_MODEL }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &vkmm.base); } if (args.wmma_fp8 || args.wmma_nv_coopmat2) { dxil_spv_option_float8_support wmma = { { DXIL_SPV_OPTION_FLOAT8_SUPPORT }, args.wmma_fp8, args.wmma_nv_coopmat2}; dxil_spv_converter_add_option(converter, &wmma.base); } if (args.nvapi) { dxil_spv_option_nvapi extn = {{ DXIL_SPV_OPTION_NVAPI }}; extn.enabled = DXIL_SPV_TRUE; extn.register_index = args.nvapi_register_index; extn.register_space = args.nvapi_register_space; dxil_spv_converter_add_option(converter, &extn.base); } if (args.non_semantic) { dxil_spv_option_extended_non_semantic sem = {{ DXIL_SPV_OPTION_EXTENDED_NON_SEMANTIC }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &sem.base); } if (args.mixed_float_dot_product) { dxil_spv_option_mixed_float_dot_product mixed = {{ DXIL_SPV_OPTION_MIXED_FLOAT_DOT_PRODUCT }, DXIL_SPV_TRUE }; dxil_spv_converter_add_option(converter, &mixed.base); } if (args.view_instancing) { dxil_spv_option_view_instancing inst = {{ DXIL_SPV_OPTION_VIEW_INSTANCING }}; inst.enabled = DXIL_SPV_TRUE; inst.last_pre_rasterization_stage = args.view_instancing_last_pre_rasterization_stage; inst.view_index_to_view_instance_spec_id = args.view_index_to_view_instance_spec_id; inst.view_instance_to_viewport_spec_id = args.view_instance_to_viewport_spec_id; dxil_spv_converter_add_option(converter, &inst.base); } for (auto &quirk : args.quirks) { dxil_spv_option_shader_quirk helper = { { DXIL_SPV_OPTION_SHADER_QUIRK }, quirk }; dxil_spv_converter_add_option(converter, &helper.base); } for (auto &meta : args.meta_descriptors) dxil_spv_converter_set_meta_descriptor(converter, meta.meta, meta.kind, meta.desc_set, meta.desc_binding); dxil_spv_converter_add_option(converter, &args.offset_buffer_layout.base); unsigned num_entry_points = 1; if (args.debug_all_entry_points) dxil_spv_parsed_blob_get_num_entry_points(blob, &num_entry_points); std::string final_output; for (unsigned entry_point = 0; entry_point < num_entry_points; entry_point++) { const char *demangled_entry = nullptr; if (args.debug_all_entry_points) { dxil_spv_parsed_blob_get_entry_point_demangled_name(blob, entry_point, &demangled_entry); dxil_spv_converter_set_entry_point(converter, demangled_entry); } else if (!args.entry_point.empty()) dxil_spv_converter_set_entry_point(converter, args.entry_point.c_str()); if (dxil_spv_converter_run(converter) != DXIL_SPV_SUCCESS) { LOGE("Failed to convert DXIL to SPIR-V.\n"); return EXIT_FAILURE; } dxil_spv_compiled_spirv compiled; if (dxil_spv_converter_get_compiled_spirv(converter, &compiled) != DXIL_SPV_SUCCESS) return EXIT_FAILURE; unsigned heuristic_min_wave_size = 0; unsigned heuristic_max_wave_size = 0; unsigned wave_size_min = 0; unsigned wave_size_max = 0; unsigned wave_size_preferred = 0; dxil_spv_converter_get_compute_wave_size_range(converter, &wave_size_min, &wave_size_max, &wave_size_preferred); dxil_spv_converter_get_compute_heuristic_min_wave_size(converter, &heuristic_min_wave_size); dxil_spv_converter_get_compute_heuristic_max_wave_size(converter, &heuristic_max_wave_size); if (args.validate) { if (!validate_spirv(compiled.data, compiled.size)) { LOGE("Failed to validate SPIR-V.\n"); return EXIT_FAILURE; } } std::string spirv_asm_string; if (args.emit_asm || (!args.glsl && args.output_path.empty())) { if (wave_size_min) { spirv_asm_string += "// WaveSize("; spirv_asm_string += std::to_string(wave_size_min); if (wave_size_max || wave_size_preferred) spirv_asm_string += "," + std::to_string(wave_size_max ? wave_size_max : wave_size_min); if (wave_size_preferred) spirv_asm_string += "," + std::to_string(wave_size_preferred); spirv_asm_string += ")\n"; } if (heuristic_min_wave_size) { spirv_asm_string += "// HeuristicWaveSizeMin("; spirv_asm_string += std::to_string(heuristic_min_wave_size); spirv_asm_string += ")\n"; } if (heuristic_max_wave_size) { spirv_asm_string += "// HeuristicWaveSize("; spirv_asm_string += std::to_string(heuristic_max_wave_size); spirv_asm_string += ")\n"; } dxil_spv_bool compat; if (dxil_spv_converter_is_multiview_compatible(converter, &compat) == DXIL_SPV_SUCCESS && compat) spirv_asm_string += "// MultiviewCompatible\n"; if (demangled_entry && !args.glsl) { spirv_asm_string += "// ========== "; spirv_asm_string += demangled_entry; spirv_asm_string += " ==========\n"; } spirv_asm_string += convert_to_asm(compiled.data, compiled.size); if (demangled_entry && !args.glsl) spirv_asm_string += "// ==================\n"; } if (args.glsl) { auto compiled_glsl = convert_to_glsl(compiled.data, compiled.size); if (compiled_glsl.empty()) { LOGE("Failed to convert to GLSL.\n"); return EXIT_FAILURE; } const char *warn = dxil_spv_converter_get_analysis_warnings(converter); if (warn && *warn != '\0') { compiled_glsl += "/* WARNINGS:\n"; compiled_glsl += warn; compiled_glsl += "*/\n\n"; } if (!spirv_asm_string.empty()) { compiled_glsl += "\n#if 0\n"; compiled_glsl += "// SPIR-V disassembly\n"; compiled_glsl += spirv_asm_string; compiled_glsl += "#endif"; } std::string output; if (demangled_entry) { output += "// ========= "; output += demangled_entry; output += " =========\n"; output += compiled_glsl; output += "\n// =================\n"; } else output = std::move(compiled_glsl); final_output += output; } else if (args.emit_asm || args.output_path.empty()) { final_output += spirv_asm_string; } else { if (demangled_entry) { LOGE("Cannot emit binary output when using debug-all-entry-points.\n"); return EXIT_FAILURE; } FILE *file = fopen(args.output_path.c_str(), "wb"); if (file) { if (fwrite(compiled.data, 1, compiled.size, file) != compiled.size) { LOGE("Failed to write SPIR-V.\n"); return EXIT_FAILURE; } fclose(file); } else LOGE("Failed to open %s.\n", args.output_path.c_str()); } } // Dump debug output of RDAT objects if we have them. if (args.glsl || args.emit_asm) { unsigned num_subobjects = dxil_spv_parsed_blob_get_num_rdat_subobjects(blob); if (num_subobjects > 0) { final_output += "\n#if 0\n==== RDAT ====\n"; for (unsigned i = 0; i < num_subobjects; i++) { dxil_spv_rdat_subobject obj; dxil_spv_parsed_blob_get_rdat_subobject(blob, i, &obj); switch (obj.kind) { case DXIL_SPV_RDAT_SUBOBJECT_KIND_STATE_OBJECT_CONFIG: final_output += "ShaderConfig "; final_output += obj.subobject_name; final_output += " = { flags = "; final_output += std::to_string(obj.args[0]); final_output += " };\n"; break; case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG: final_output += "ShaderConfig "; final_output += obj.subobject_name; final_output += " = { maxPayloadSize = "; final_output += std::to_string(obj.args[0]); final_output += ", maxAttributeSize = "; final_output += std::to_string(obj.args[1]); final_output += " };\n"; break; case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG: case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1: final_output += "RaytracingPipelineConfig1 "; final_output += obj.subobject_name; final_output += " = { maxRecursion = "; final_output += std::to_string(obj.args[0]); final_output += ", flags = "; final_output += std::to_string(obj.args[1]); final_output += " };\n"; break; case DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE: case DXIL_SPV_RDAT_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE: final_output += obj.kind == DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE ? "GlobalRootSignature " : "LocalRootSignature "; final_output += obj.subobject_name; final_output += " = { "; final_output += std::to_string(obj.payload_size); final_output += " bytes };\n"; break; case DXIL_SPV_RDAT_SUBOBJECT_KIND_HIT_GROUP: final_output += obj.hit_group_type == DXIL_SPV_HIT_GROUP_TYPE_TRIANGLE ? "TriangleHitGroup " : "ProceduralHitGroup "; final_output += obj.subobject_name; assert(obj.num_exports == 3); final_output += " = { ahit = \""; final_output += obj.exports[0]; final_output += "\", chit = \""; final_output += obj.exports[1]; final_output += "\", intersection = \""; final_output += obj.exports[2]; final_output += "\" };\n"; break; case DXIL_SPV_RDAT_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION: final_output += "SubobjectToExportsAssociation "; final_output += obj.subobject_name; final_output += " = { "; assert(obj.num_exports >= 1); final_output += obj.exports[0]; final_output += ", { "; for (unsigned j = 1; j < obj.num_exports; j++) { final_output += obj.exports[j]; if (j + 1 < obj.num_exports) final_output += ", "; } final_output += " } };\n"; break; default: break; } } final_output += "============\n#endif"; } } if (args.output_path.empty()) { printf("%s\n", final_output.c_str()); } else if (!final_output.empty()) { FILE *file = fopen(args.output_path.c_str(), "w"); if (!file) { LOGE("Failed to open %s for writing.\n", args.output_path.c_str()); return EXIT_FAILURE; } fprintf(file, "%s\n", final_output.c_str()); fclose(file); } dxil_spv_converter_free(converter); dxil_spv_parsed_blob_free(blob); if (reflection_blob) dxil_spv_parsed_blob_free(reflection_blob); dxil_spv_end_thread_allocator_context(); return EXIT_SUCCESS; } ================================================ FILE: dxil_spirv_c.cpp ================================================ /* Copyright (c) 2019-2022 Hans-Kristian Arntzen for Valve Corporation * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define DXIL_SPV_ENABLE_EXPERIMENTAL_WORKGRAPHS #define DXIL_SPV_ENABLE_EXPERIMENTAL_MULTIVIEW #include "thread_local_allocator.hpp" #include "dxil_spirv_c.h" #include "dxil_converter.hpp" #include "dxil_parser.hpp" #include "llvm_bitcode_parser.hpp" #include "logging.hpp" #include "spirv_module.hpp" #include #include using namespace dxil_spv; void dxil_spv_get_version(unsigned *major, unsigned *minor, unsigned *patch) { *major = DXIL_SPV_API_VERSION_MAJOR; *minor = DXIL_SPV_API_VERSION_MINOR; *patch = DXIL_SPV_API_VERSION_PATCH; } struct dxil_spv_parsed_blob_s { LLVMBCParser bc; #ifdef HAVE_LLVMBC String disasm; #else std::string disasm; #endif Vector dxil_blob; Vector rdat_subobjects; struct EntryPoint { String mangled; String demangled; NodeInputData node_input; Vector node_outputs; }; Vector entry_points; }; struct Remapper : ResourceRemappingInterface { static void copy_buffer_binding(VulkanBinding &vk_binding, const dxil_spv_vulkan_binding &c_vk_binding) { vk_binding.descriptor_set = c_vk_binding.set; vk_binding.binding = c_vk_binding.binding; vk_binding.root_constant_index = c_vk_binding.root_constant_index; vk_binding.bindless.use_heap = bool(c_vk_binding.bindless.use_heap); vk_binding.bindless.heap_root_offset = c_vk_binding.bindless.heap_root_offset; vk_binding.descriptor_type = static_cast(c_vk_binding.descriptor_type); } bool remap_srv(const D3DBinding &binding, VulkanSRVBinding &vk_binding) override { if (srv_remapper) { const dxil_spv_d3d_binding c_binding = { static_cast(binding.stage), static_cast(binding.kind), binding.resource_index, binding.register_space, binding.register_index, binding.range_size, binding.alignment }; dxil_spv_srv_vulkan_binding c_vk_binding = {}; if (srv_remapper(srv_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE) { copy_buffer_binding(vk_binding.buffer_binding, c_vk_binding.buffer_binding); copy_buffer_binding(vk_binding.offset_binding, c_vk_binding.offset_binding); return true; } else return false; } else { vk_binding.buffer_binding.bindless.use_heap = false; vk_binding.buffer_binding.descriptor_set = binding.register_space; vk_binding.buffer_binding.binding = binding.register_index; vk_binding.buffer_binding.descriptor_type = VulkanDescriptorType::Identity; vk_binding.offset_binding = {}; return true; } } bool remap_sampler(const D3DBinding &binding, VulkanBinding &vk_binding) override { if (sampler_remapper) { const dxil_spv_d3d_binding c_binding = { static_cast(binding.stage), static_cast(binding.kind), binding.resource_index, binding.register_space, binding.register_index, binding.range_size, binding.alignment }; dxil_spv_vulkan_binding c_vk_binding = {}; if (sampler_remapper(sampler_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE) { copy_buffer_binding(vk_binding, c_vk_binding); return true; } else return false; } else { vk_binding.bindless.use_heap = false; vk_binding.descriptor_set = binding.register_space; vk_binding.binding = binding.register_index; vk_binding.descriptor_type = VulkanDescriptorType::Identity; return true; } } bool remap_uav(const D3DUAVBinding &binding, VulkanUAVBinding &vk_binding) override { if (uav_remapper) { const dxil_spv_uav_d3d_binding c_binding = { { static_cast(binding.binding.stage), static_cast(binding.binding.kind), binding.binding.resource_index, binding.binding.register_space, binding.binding.register_index, binding.binding.range_size, binding.binding.alignment }, binding.counter ? DXIL_SPV_TRUE : DXIL_SPV_FALSE }; dxil_spv_uav_vulkan_binding c_vk_binding = {}; if (uav_remapper(uav_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE) { copy_buffer_binding(vk_binding.buffer_binding, c_vk_binding.buffer_binding); copy_buffer_binding(vk_binding.counter_binding, c_vk_binding.counter_binding); copy_buffer_binding(vk_binding.offset_binding, c_vk_binding.offset_binding); return true; } else return false; } else { vk_binding.buffer_binding.bindless.use_heap = false; vk_binding.counter_binding.bindless.use_heap = false; vk_binding.buffer_binding.descriptor_set = binding.binding.register_space; vk_binding.buffer_binding.binding = binding.binding.register_index; vk_binding.counter_binding.descriptor_set = binding.binding.register_space + 1; vk_binding.counter_binding.binding = binding.binding.register_index; vk_binding.buffer_binding.descriptor_type = VulkanDescriptorType::Identity; vk_binding.counter_binding.descriptor_type = VulkanDescriptorType::Identity; vk_binding.offset_binding = {}; return true; } } bool remap_cbv(const D3DBinding &binding, VulkanCBVBinding &vk_binding) override { if (cbv_remapper) { const dxil_spv_d3d_binding c_binding = { static_cast(binding.stage), static_cast(binding.kind), binding.resource_index, binding.register_space, binding.register_index, binding.range_size, binding.alignment }; dxil_spv_cbv_vulkan_binding c_vk_binding = {}; if (cbv_remapper(cbv_userdata, &c_binding, &c_vk_binding) == DXIL_SPV_TRUE) { vk_binding.push_constant = c_vk_binding.push_constant; if (vk_binding.push_constant) vk_binding.push.offset_in_words = c_vk_binding.vulkan.push_constant.offset_in_words; else copy_buffer_binding(vk_binding.buffer, c_vk_binding.vulkan.uniform_binding); return true; } else return false; } else { vk_binding.buffer.bindless.use_heap = false; vk_binding.buffer.descriptor_set = binding.register_space; vk_binding.buffer.binding = binding.register_index; vk_binding.buffer.descriptor_type = VulkanDescriptorType::Identity; return true; } } bool remap_vertex_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) override { dxil_spv_d3d_vertex_input c_input = { d3d_input.semantic, d3d_input.semantic_index, d3d_input.start_row, d3d_input.rows }; dxil_spv_vulkan_vertex_input c_vk_input = {}; if (input_remapper) { if (input_remapper(input_userdata, &c_input, &c_vk_input) == DXIL_SPV_TRUE) { vk_input.location = c_vk_input.location; vk_input.component = 0; return true; } else return false; } else { vk_input.location = d3d_input.start_row; return true; } } bool remap_stream_output(const D3DStreamOutput &d3d_output, VulkanStreamOutput &vk_output) override { dxil_spv_d3d_stream_output c_output = { d3d_output.semantic, d3d_output.semantic_index }; dxil_spv_vulkan_stream_output c_vk_output = {}; if (output_remapper) { if (output_remapper(output_userdata, &c_output, &c_vk_output) == DXIL_SPV_TRUE) { vk_output.enable = bool(c_vk_output.enable); vk_output.offset = c_vk_output.offset; vk_output.stride = c_vk_output.stride; vk_output.buffer_index = c_vk_output.buffer_index; return true; } else return false; } else { return true; } } bool remap_stage_input(const D3DStageIO &d3d_input, VulkanStageIO &vk_input) override { dxil_spv_d3d_shader_stage_io c_input = { d3d_input.semantic, d3d_input.semantic_index }; dxil_spv_vulkan_shader_stage_io c_vk_input = { vk_input.location, vk_input.component, vk_input.flags }; if (stage_input_remapper) { if (stage_input_remapper(stage_input_userdata, &c_input, &c_vk_input) == DXIL_SPV_TRUE) { vk_input.location = c_vk_input.location; vk_input.component = c_vk_input.component; vk_input.flags = c_vk_input.flags; return true; } else return false; } else { return true; } } bool has_nontrivial_stage_input_remapping() override { return stage_input_remapper != nullptr; } bool remap_stage_output(const D3DStageIO &d3d_output, VulkanStageIO &vk_output) override { dxil_spv_d3d_shader_stage_io c_output = { d3d_output.semantic, d3d_output.semantic_index }; dxil_spv_vulkan_shader_stage_io c_vk_output = { vk_output.location, vk_output.component, vk_output.flags }; if (stage_output_remapper) { if (stage_output_remapper(stage_output_userdata, &c_output, &c_vk_output) == DXIL_SPV_TRUE) { vk_output.location = c_vk_output.location; vk_output.component = c_vk_output.component; vk_output.flags = c_vk_output.flags; return true; } else return false; } else { return true; } } unsigned get_root_constant_word_count() override { return root_constant_word_count; } unsigned get_root_descriptor_count() override { return root_descriptor_count; } dxil_spv_srv_remapper_cb srv_remapper = nullptr; void *srv_userdata = nullptr; dxil_spv_sampler_remapper_cb sampler_remapper = nullptr; void *sampler_userdata = nullptr; dxil_spv_uav_remapper_cb uav_remapper = nullptr; void *uav_userdata = nullptr; dxil_spv_cbv_remapper_cb cbv_remapper = nullptr; void *cbv_userdata = nullptr; dxil_spv_vertex_input_remapper_cb input_remapper = nullptr; void *input_userdata = nullptr; dxil_spv_stream_output_remapper_cb output_remapper = nullptr; void *output_userdata = nullptr; dxil_spv_shader_stage_io_remapper_cb stage_input_remapper = nullptr; void *stage_input_userdata = nullptr; dxil_spv_shader_stage_io_remapper_cb stage_output_remapper = nullptr; void *stage_output_userdata = nullptr; unsigned root_constant_word_count = 0; unsigned root_descriptor_count = 0; }; enum class LocalRootParameterType { Constants, Descriptor, Table }; struct LocalConstants { unsigned register_space; unsigned register_index; unsigned num_words; }; struct LocalDescriptor { ResourceClass resource_class; unsigned register_space; unsigned register_index; }; struct LocalRootParameter { LocalRootParameterType type; LocalConstants local_constants; LocalDescriptor local_descriptor; Vector table_entries; }; struct dxil_spv_converter_s { dxil_spv_converter_s(LLVMBCParser &bc_parser_, LLVMBCParser *bc_reflection_parser_) : bc_parser(bc_parser_), bc_reflection_parser(bc_reflection_parser_) { } LLVMBCParser &bc_parser; LLVMBCParser *bc_reflection_parser; Vector spirv; String entry_point; String compiled_entry_point; String analysis_warnings; Remapper remapper; Vector local_root_parameters; Vector> options; struct MetaDescriptorMapping { MetaDescriptor meta; MetaDescriptorKind kind; uint32_t desc_set; uint32_t desc_binding; }; Vector meta_mappings; Vector local_entries; bool active_table = false; bool uses_subgroup_size = false; bool is_multiview_compatible = false; uint32_t workgroup_size[3] = {}; uint32_t patch_vertex_count = 0; uint32_t patch_location_offset = UINT32_MAX; uint32_t wave_size_min = 0; uint32_t wave_size_max = 0; uint32_t wave_size_preferred = 0; uint32_t heuristic_min_wave_size = 0; uint32_t heuristic_max_wave_size = 0; Vector> root_parameter_mappings; Vector non_semantic_debug_info; bool shader_feature_used[unsigned(ShaderFeature::Count)] = {}; }; dxil_spv_result dxil_spv_parse_dxil_blob(const void *data, size_t size, dxil_spv_parsed_blob *blob) { auto *parsed = new (std::nothrow) dxil_spv_parsed_blob_s; if (!parsed) return DXIL_SPV_ERROR_OUT_OF_MEMORY; DXILContainerParser parser; if (!parser.parse_container(data, size, false)) { delete parsed; return DXIL_SPV_ERROR_PARSER; } parsed->dxil_blob = std::move(parser.get_blob()); parsed->rdat_subobjects = std::move(parser.get_rdat_subobjects()); bool success; if (parser.is_dxbc_binary()) success = parsed->bc.parseDXBCBinary(data, size); else success = parsed->bc.parse(parsed->dxil_blob.data(), parsed->dxil_blob.size()); if (!success) { delete parsed; return DXIL_SPV_ERROR_PARSER; } auto names = Converter::get_entry_points(parsed->bc); for (auto &name : names) { parsed->entry_points.push_back({ name, demangle_entry_point(name), Converter::get_node_input(parsed->bc, name.c_str()), Converter::get_node_outputs(parsed->bc, name.c_str()) }); } *blob = parsed; return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parse_reflection_dxil_blob(const void *data, size_t size, dxil_spv_parsed_blob *blob) { auto *parsed = new (std::nothrow) dxil_spv_parsed_blob_s; if (!parsed) return DXIL_SPV_ERROR_OUT_OF_MEMORY; DXILContainerParser parser; if (!parser.parse_container(data, size, true)) { delete parsed; return DXIL_SPV_ERROR_PARSER; } if (parser.get_blob().empty()) { delete parsed; return DXIL_SPV_ERROR_NO_DATA; } parsed->dxil_blob = std::move(parser.get_blob()); if (!parsed->bc.parse(parsed->dxil_blob.data(), parsed->dxil_blob.size())) { delete parsed; return DXIL_SPV_ERROR_PARSER; } *blob = parsed; return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parse_dxil(const void *data, size_t size, dxil_spv_parsed_blob *blob) { auto *parsed = new (std::nothrow) dxil_spv_parsed_blob_s; if (!parsed) return DXIL_SPV_ERROR_OUT_OF_MEMORY; if (!parsed->bc.parse(data, size)) { delete parsed; return DXIL_SPV_ERROR_PARSER; } auto names = Converter::get_entry_points(parsed->bc); for (auto &name : names) { parsed->entry_points.push_back({ name, demangle_entry_point(name), Converter::get_node_input(parsed->bc, name.c_str()), Converter::get_node_outputs(parsed->bc, name.c_str()) }); } *blob = parsed; return DXIL_SPV_SUCCESS; } void dxil_spv_parsed_blob_dump_llvm_ir(dxil_spv_parsed_blob blob) { auto &module = blob->bc.get_module(); #ifdef HAVE_LLVMBC String str; if (llvm::disassemble(module, str)) fprintf(stderr, "%s\n", str.c_str()); else fprintf(stderr, "Failed to disassemble LLVM IR!\n"); #else module.print(llvm::errs(), nullptr); #endif } dxil_spv_result dxil_spv_parsed_blob_get_disassembled_ir(dxil_spv_parsed_blob blob, const char **str) { blob->disasm.clear(); auto *module = &blob->bc.get_module(); #ifdef HAVE_LLVMBC if (!llvm::disassemble(*module, blob->disasm)) return DXIL_SPV_ERROR_GENERIC; #else llvm::raw_string_ostream ostr(blob->disasm); module->print(ostr, nullptr); #endif *str = blob->disasm.c_str(); return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parsed_blob_get_raw_ir(dxil_spv_parsed_blob blob, const void **data, size_t *size) { if (blob->dxil_blob.empty()) return DXIL_SPV_ERROR_GENERIC; *data = blob->dxil_blob.data(); *size = blob->dxil_blob.size(); return DXIL_SPV_SUCCESS; } dxil_spv_shader_stage dxil_spv_parsed_blob_get_shader_stage(dxil_spv_parsed_blob blob) { return static_cast(Converter::get_shader_stage(blob->bc)); } dxil_spv_shader_stage dxil_spv_parsed_blob_get_shader_stage_for_entry(dxil_spv_parsed_blob blob, const char *entry) { return static_cast(Converter::get_shader_stage(blob->bc, entry)); } dxil_spv_result dxil_spv_parsed_blob_get_entry_index_by_name(dxil_spv_parsed_blob blob, const char *entry, unsigned *index) { for (size_t i = 0, n = blob->entry_points.size(); i < n; i++) { if (blob->entry_points[i].demangled == entry || blob->entry_points[i].mangled == entry) { *index = unsigned(i); return DXIL_SPV_SUCCESS; } } return DXIL_SPV_ERROR_GENERIC; } dxil_spv_result dxil_spv_parsed_blob_get_num_entry_points(dxil_spv_parsed_blob blob, unsigned *count) { *count = unsigned(blob->entry_points.size()); return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parsed_blob_get_entry_point_name(dxil_spv_parsed_blob blob, unsigned index, const char **mangled_entry) { if (index >= blob->entry_points.size()) return DXIL_SPV_ERROR_INVALID_ARGUMENT; *mangled_entry = blob->entry_points[index].mangled.c_str(); return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parsed_blob_get_entry_point_demangled_name(dxil_spv_parsed_blob blob, unsigned index, const char **demangled_entry) { if (index >= blob->entry_points.size()) return DXIL_SPV_ERROR_INVALID_ARGUMENT; *demangled_entry = blob->entry_points[index].demangled.c_str(); return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parsed_blob_get_entry_point_node_input( dxil_spv_parsed_blob blob, unsigned index, dxil_spv_node_input_data *data) { if (index >= blob->entry_points.size()) return DXIL_SPV_ERROR_INVALID_ARGUMENT; auto &input = blob->entry_points[index].node_input; data->node_id = input.node_id.c_str(); data->payload_stride = input.payload_stride; data->launch_type = dxil_spv_node_launch_type(input.launch_type); data->node_array_index = input.node_array_index; data->dispatch_grid_offset = input.grid_buffer.offset; data->dispatch_grid_type_bits = input.grid_buffer.component_type == DXIL::ComponentType::U32 ? 32 : 16; data->dispatch_grid_components = input.grid_buffer.count; for (int i = 0; i < 3; i++) { data->broadcast_grid[i] = input.broadcast_grid[i]; data->thread_group_size_spec_id[i] = input.thread_group_size_spec_id[i]; data->max_broadcast_grid_spec_id[i] = input.max_broadcast_grid_spec_id[i]; } data->recursion_factor = input.recursion_factor; data->coalesce_factor = input.coalesce_factor; data->node_share_input_id = input.node_share_input_id.c_str(); data->node_share_input_array_index = input.node_share_input_array_index; data->local_root_arguments_table_index = input.local_root_arguments_table_index; data->is_indirect_bda_stride_program_entry_spec_id = input.is_indirect_bda_stride_program_entry_spec_id; data->is_entry_point_spec_id = input.is_entry_point_spec_id; data->dispatch_grid_is_upper_bound = input.dispatch_grid_is_upper_bound ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; data->dispatch_grid_is_upper_bound_spec_id = input.dispatch_grid_is_upper_bound_spec_id; data->is_static_broadcast_node_spec_id = input.is_static_broadcast_node_spec_id; data->node_track_rw_input_sharing = input.node_track_rw_input_sharing ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; data->is_program_entry = input.is_program_entry ? DXIL_SPV_TRUE : DXIL_SPV_FALSE; return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parsed_blob_get_entry_point_num_node_outputs( dxil_spv_parsed_blob blob, unsigned index, unsigned *num_outputs) { if (index >= blob->entry_points.size()) return DXIL_SPV_ERROR_INVALID_ARGUMENT; *num_outputs = unsigned(blob->entry_points[index].node_outputs.size()); return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parsed_blob_get_entry_point_node_output( dxil_spv_parsed_blob blob, unsigned index, unsigned output_index, dxil_spv_node_output_data *data) { if (index >= blob->entry_points.size()) return DXIL_SPV_ERROR_INVALID_ARGUMENT; auto &entry = blob->entry_points[index]; if (output_index >= entry.node_outputs.size()) return DXIL_SPV_ERROR_INVALID_ARGUMENT; auto &output = entry.node_outputs[output_index]; data->node_id = output.node_id.c_str(); data->node_array_index = output.node_array_index; data->node_array_size = output.node_array_size; data->sparse_array = output.sparse_array; data->max_records = output.max_records; data->node_index_spec_constant_id = output.node_index_spec_constant_id; return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_parsed_blob_scan_resources(dxil_spv_parsed_blob blob, dxil_spv_srv_remapper_cb srv_remapper, dxil_spv_sampler_remapper_cb sampler_remapper, dxil_spv_cbv_remapper_cb cbv_remapper, dxil_spv_uav_remapper_cb uav_remapper, void *userdata) { Remapper remapper; remapper.srv_remapper = srv_remapper; remapper.srv_userdata = userdata; remapper.sampler_remapper = sampler_remapper; remapper.sampler_userdata = userdata; remapper.cbv_remapper = cbv_remapper; remapper.cbv_userdata = userdata; remapper.uav_remapper = uav_remapper; remapper.uav_userdata = userdata; Converter::scan_resources(&remapper, blob->bc); return DXIL_SPV_SUCCESS; } void dxil_spv_parsed_blob_free(dxil_spv_parsed_blob blob) { delete blob; } dxil_spv_result dxil_spv_create_converter_with_reflection(dxil_spv_parsed_blob blob, dxil_spv_parsed_blob reflection_blob, dxil_spv_converter *converter) { auto *conv = new (std::nothrow) dxil_spv_converter_s(blob->bc, reflection_blob ? &reflection_blob->bc : nullptr); if (!conv) return DXIL_SPV_ERROR_OUT_OF_MEMORY; *converter = conv; return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_create_converter(dxil_spv_parsed_blob blob, dxil_spv_converter *converter) { return dxil_spv_create_converter_with_reflection(blob, nullptr, converter); } void dxil_spv_converter_free(dxil_spv_converter converter) { delete converter; } void dxil_spv_converter_set_entry_point(dxil_spv_converter converter, const char *entry_point) { if (entry_point) converter->entry_point = entry_point; else converter->entry_point.clear(); } dxil_spv_result dxil_spv_converter_run(dxil_spv_converter converter) { SPIRVModule module; Converter dxil_converter(converter->bc_parser, converter->bc_reflection_parser, module); if (!converter->entry_point.empty()) dxil_converter.set_entry_point(converter->entry_point.c_str()); dxil_converter.set_resource_remapping_interface(&converter->remapper); for (auto &opt : converter->options) dxil_converter.add_option(*opt); if (converter->patch_location_offset != UINT32_MAX) dxil_converter.set_patch_location_offset(converter->patch_location_offset); for (auto &mapping : converter->root_parameter_mappings) dxil_converter.add_root_parameter_mapping(mapping.first, mapping.second); for (auto &info : converter->non_semantic_debug_info) dxil_converter.add_non_semantic_debug_info(info); for (auto &mapping : converter->meta_mappings) dxil_converter.set_meta_descriptor(mapping.meta, mapping.kind, mapping.desc_set, mapping.desc_binding); for (auto &local_param : converter->local_root_parameters) { switch (local_param.type) { case LocalRootParameterType::Constants: dxil_converter.add_local_root_constants(local_param.local_constants.register_space, local_param.local_constants.register_index, local_param.local_constants.num_words); break; case LocalRootParameterType::Descriptor: dxil_converter.add_local_root_descriptor(local_param.local_descriptor.resource_class, local_param.local_descriptor.register_space, local_param.local_descriptor.register_index); break; case LocalRootParameterType::Table: dxil_converter.add_local_root_descriptor_table(local_param.table_entries); break; } } auto entry_point = dxil_converter.convert_entry_point(); if (entry_point.entry.entry == nullptr) { LOGE("Failed to convert function.\n"); return DXIL_SPV_ERROR_GENERIC; } { dxil_spv::CFGStructurizer structurizer(entry_point.entry.entry, *entry_point.node_pool, module); module.set_entry_build_point(entry_point.entry.func); if (entry_point.entry.is_structured) structurizer.run_trivial(); else structurizer.run(); module.emit_entry_point_function_body(structurizer); } for (auto &leaf : entry_point.leaf_functions) { if (!leaf.entry) { LOGE("Leaf function is nullptr!\n"); return DXIL_SPV_ERROR_GENERIC; } dxil_spv::CFGStructurizer structurizer(leaf.entry, *entry_point.node_pool, module); module.set_entry_build_point(leaf.func); if (leaf.is_structured) structurizer.run_trivial(); else structurizer.run(); module.emit_leaf_function_body(leaf.func, structurizer); } if (!module.finalize_spirv(converter->spirv)) { LOGE("Failed to finalize SPIR-V.\n"); return DXIL_SPV_ERROR_GENERIC; } converter->compiled_entry_point = dxil_converter.get_compiled_entry_point(); converter->uses_subgroup_size = module.has_builtin_shader_input(spv::BuiltInSubgroupSize); converter->is_multiview_compatible = dxil_converter.is_multiview_compatible(); dxil_converter.get_workgroup_dimensions(converter->workgroup_size[0], converter->workgroup_size[1], converter->workgroup_size[2]); dxil_converter.get_compute_wave_size_range(converter->wave_size_min, converter->wave_size_max, converter->wave_size_preferred); converter->heuristic_min_wave_size = dxil_converter.get_compute_heuristic_min_wave_size(); converter->heuristic_max_wave_size = dxil_converter.get_compute_heuristic_max_wave_size(); converter->patch_vertex_count = dxil_converter.get_patch_vertex_count(); converter->patch_location_offset = dxil_converter.get_patch_location_offset(); for (int i = 0; i < int(ShaderFeature::Count); i++) converter->shader_feature_used[i] = dxil_converter.shader_requires_feature(ShaderFeature(i)); converter->analysis_warnings = dxil_converter.get_analysis_warnings(); return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_converter_get_compiled_spirv(dxil_spv_converter converter, dxil_spv_compiled_spirv *compiled) { if (converter->spirv.empty()) return DXIL_SPV_ERROR_GENERIC; compiled->data = converter->spirv.data(); compiled->size = converter->spirv.size() * sizeof(uint32_t); return DXIL_SPV_SUCCESS; } dxil_spv_result dxil_spv_converter_get_compiled_entry_point(dxil_spv_converter converter, const char **entry_point) { if (converter->spirv.empty()) return DXIL_SPV_ERROR_GENERIC; *entry_point = converter->compiled_entry_point.c_str(); return DXIL_SPV_SUCCESS; } void dxil_spv_converter_set_srv_remapper(dxil_spv_converter converter, dxil_spv_srv_remapper_cb remapper, void *userdata) { converter->remapper.srv_remapper = remapper; converter->remapper.srv_userdata = userdata; } void dxil_spv_converter_set_sampler_remapper(dxil_spv_converter converter, dxil_spv_sampler_remapper_cb remapper, void *userdata) { converter->remapper.sampler_remapper = remapper; converter->remapper.sampler_userdata = userdata; } void dxil_spv_converter_set_root_constant_word_count(dxil_spv_converter converter, unsigned num_words) { converter->remapper.root_constant_word_count = num_words; } void dxil_spv_converter_set_root_descriptor_count(dxil_spv_converter converter, unsigned count) { converter->remapper.root_descriptor_count = count; } void dxil_spv_converter_set_uav_remapper(dxil_spv_converter converter, dxil_spv_uav_remapper_cb remapper, void *userdata) { converter->remapper.uav_remapper = remapper; converter->remapper.uav_userdata = userdata; } void dxil_spv_converter_set_cbv_remapper(dxil_spv_converter converter, dxil_spv_cbv_remapper_cb remapper, void *userdata) { converter->remapper.cbv_remapper = remapper; converter->remapper.cbv_userdata = userdata; } void dxil_spv_converter_set_stage_input_remapper(dxil_spv_converter converter, dxil_spv_shader_stage_io_remapper_cb remapper, void *userdata) { converter->remapper.stage_input_remapper = remapper; converter->remapper.stage_input_userdata = userdata; } void dxil_spv_converter_set_stage_output_remapper(dxil_spv_converter converter, dxil_spv_shader_stage_io_remapper_cb remapper, void *userdata) { converter->remapper.stage_output_remapper = remapper; converter->remapper.stage_output_userdata = userdata; } void dxil_spv_converter_set_vertex_input_remapper(dxil_spv_converter converter, dxil_spv_vertex_input_remapper_cb remapper, void *userdata) { converter->remapper.input_remapper = remapper; converter->remapper.input_userdata = userdata; } void dxil_spv_converter_set_stream_output_remapper(dxil_spv_converter converter, dxil_spv_stream_output_remapper_cb remapper, void *userdata) { converter->remapper.output_remapper = remapper; converter->remapper.output_userdata = userdata; } /* Useful to check if the implementation recognizes a particular capability for ABI compatibility. */ dxil_spv_bool dxil_spv_converter_supports_option(dxil_spv_option cap) { return Converter::recognizes_option(static_cast